Skip to content

Commit 5ebc405

Browse files
authored
Merge pull request #11 from mjanowiecki/master
ASpace & readme updates
2 parents db10e49 + acebb2c commit 5ebc405

File tree

3 files changed

+181
-6
lines changed

3 files changed

+181
-6
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ All of these scripts require a secrets.py file in the same directory that must c
77
baseURL='[ArchivesSpace API URL]'
88
user='[user name]'
99
password='[password]'
10+
repository='[repository]'
1011

1112
This secrets.py file will be ignored according to the repository's .gitignore file so that ArchivesSpace login details will not be inadvertently exposed through GitHub.
1213

@@ -39,7 +40,7 @@ Retrieves a count of archival objects associated with a particular resource. Upo
3940
Extracts all of the archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
4041

4142
#### [getArchivalObjectRefIdsForResource.py](/getArchivalObjectRefIdsForResource.py)
42-
Extracts the title, URI, ref_id, and date expression for all archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
43+
Extracts the title, URI, ref_id, date expression, and level for all archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
4344

4445
#### [getArrayPropertiesFromAgentsPeopleCSV.py](/getArrayPropertiesFromAgentsPeopleCSV.py)
4546
Retrieves specific properties, including proprerties that have arrays as values, from the JSON of ArchivesSpace agent_people records. In this example, the 'dates_of existence' property contains an array that must be iterated over. This requires a second level of iteration with 'for j in range (...)' on line 20, which is in addition to the iteration function 'for i in range (...)' on line 19, which was also found in the getPropertiesFromAgentsPeopleCSV.py script. As with the previous script, it also writes the properties' values into a CSV file which is specified in variable 'f' on line 17.
@@ -107,6 +108,9 @@ Prints the URIs to a CSV file of all resources in a repository without a bib num
107108
#### [searchForUnassociatedContainers.py](/searchForUnassociatedContainers.py)
108109
Prints the URIs to a CSV file of all top containers that are not associated with a resource or archival object.
109110

111+
#### [transferAoDatesToDos.py](/transferAoDatesToDos.py)
112+
Transfers the date from an archival object to any attached digital objects.
113+
110114
#### [unpublishArchivalObjectsByResource.py](/unpublishArchivalObjectsByResource.py)
111115
Unpublishes all archival objects associated with the specified resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
112116

getArchivalObjectRefIdsForResource.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def findKey(d, key):
3737
resourceID= raw_input('Enter resource ID: ')
3838

3939
f=csv.writer(open('archivalObjectRefIdForResource.csv', 'wb'))
40-
f.writerow(['title']+['uri']+['ref_id']+['date'])
40+
f.writerow(['title']+['uri']+['ref_id']+['dateExpression']+['dataBegin']+['level'])
4141

4242
endpoint = '/repositories/'+repository+'/resources/'+resourceID+'/tree'
4343

@@ -51,16 +51,21 @@ def findKey(d, key):
5151
print 'downloading aos'
5252
for archivalObject in archivalObjects:
5353
output = requests.get(baseURL + archivalObject, headers=headers).json()
54-
print output
54+
print json.dumps(output)
5555
title = output['title']
5656
uri = output['uri']
5757
ref_id = output['ref_id']
58+
level = output['level']
5859
for date in output['dates']:
5960
try:
60-
date = date['expression']
61+
dateExpression = date['expression']
6162
except:
62-
date = ''
63-
f.writerow([title]+[uri]+[ref_id]+[date])
63+
dateExpression = ''
64+
try:
65+
dateBegin = date['begin']
66+
except:
67+
dateBegin = ''
68+
f.writerow([title]+[uri]+[ref_id]+[dateExpression]+[dateBegin]+[level])
6469

6570
elapsedTime = time.time() - startTime
6671
m, s = divmod(elapsedTime, 60)

transferAoDatesToDos.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
7+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
8+
if secretsVersion != '':
9+
try:
10+
secrets = __import__(secretsVersion)
11+
print 'Editing Production'
12+
except ImportError:
13+
print 'Editing Development'
14+
else:
15+
print 'Editing Development'
16+
17+
startTime = time.time()
18+
19+
baseURL = secrets.baseURL
20+
user = secrets.user
21+
password = secrets.password
22+
repository = secrets.repository
23+
24+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
25+
session = auth["session"]
26+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
27+
print 'authenticated'
28+
29+
endpoint = '/repositories/'+repository+'/archival_objects?all_ids=true'
30+
31+
ids = requests.get(baseURL + endpoint, headers=headers).json()
32+
ids.reverse()
33+
print len(ids)
34+
35+
## Generates a text file of AOs with DOs. Takes 2+ hours to generate so this code block is separate so the main portion of the script can be run more quickly.
36+
37+
# f=csv.writer(open('archivalObjectsWithDigitalObjects.csv', 'wb'))
38+
# f.writerow(['uri'])
39+
# doAos = []
40+
#
41+
# for id in ids:
42+
# endpoint = '/repositories/'+repository+'/archival_objects/'+str(id)
43+
# output = requests.get(baseURL + endpoint, headers=headers).json()
44+
# try:
45+
# dates = output['dates']
46+
# except:
47+
# dates = ''
48+
# uri = output['uri']
49+
# instances = output['instances']
50+
# for instance in instances:
51+
# if instance['instance_type'] == 'digital_object':
52+
# doUri = instance['digital_object']['ref']
53+
# print doUri
54+
# f.writerow([uri])
55+
# doAos.append(uri)
56+
#
57+
# f2=open('archivalObjectsWithDigitalObjectsList.txt', 'wb')
58+
# f2.write(json.dumps(doAos))
59+
60+
f=csv.writer(open('DigitalObjectsDatesEdited.csv', 'wb'))
61+
f.writerow(['doUri']+['oldBegin']+['oldEnd']+['oldExpression']+['oldLabel']+['aoUri']+['newBegin']+['newEnd']+['newExpression']+['newLabel']+['post'])
62+
63+
doAos = json.load(open('archivalObjectsWithDigitalObjectsList.txt', 'rb'))
64+
for doAo in doAos:
65+
print doAo
66+
aoBegin = ''
67+
aoExpression = ''
68+
aoLabel = ''
69+
aoEnd = ''
70+
doBegin = ''
71+
doExpression = ''
72+
doLabel = ''
73+
doEnd = ''
74+
aoOutput = requests.get(baseURL + doAo, headers=headers).json()
75+
try:
76+
aoDates = aoOutput['dates']
77+
for aoDate in aoDates:
78+
try:
79+
aoBegin = aoDate['begin']
80+
except:
81+
aoBegin = ''
82+
try:
83+
aoEnd = aoDate['end']
84+
except:
85+
aoEnd = ''
86+
try:
87+
aoExpression = aoDate['expression']
88+
except:
89+
aoExpression = ''
90+
try:
91+
aoLabel = aoDate['label']
92+
except:
93+
aoLabel = ''
94+
except:
95+
aoBegin = ''
96+
aoExpression = ''
97+
aoLabel = ''
98+
aoEnd = ''
99+
try:
100+
instances = aoOutput['instances']
101+
except:
102+
continue
103+
for instance in instances:
104+
if instance['instance_type'] == 'digital_object':
105+
if aoBegin+aoExpression+aoLabel != '':
106+
doUri = instance['digital_object']['ref']
107+
doOutput = requests.get(baseURL + str(doUri), headers=headers).json()
108+
print 'moving date from AO to DO'
109+
doDates = doOutput['dates']
110+
if doDates == []:
111+
print 'no date', doDates
112+
doBegin = ''
113+
doExpression = ''
114+
doLabel = ''
115+
doEnd = ''
116+
doDates = []
117+
doDate = {}
118+
doDate['begin'] = aoBegin
119+
doDate['expression'] = aoExpression
120+
doDate['label'] = aoLabel
121+
doDate['date_type'] = 'single'
122+
if aoEnd != '':
123+
doDate['end'] = aoEnd
124+
doDate['date_type'] = 'range'
125+
doDates.append(doDate)
126+
doOutput['dates'] = doDates
127+
output = json.dumps(doOutput)
128+
doPost = requests.post(baseURL + doUri, headers=headers, data=output).json()
129+
print doPost
130+
else:
131+
print 'existing date', doDates
132+
for doDate in doDates:
133+
try:
134+
doBegin = doDate['begin']
135+
except:
136+
doBegin = ''
137+
try:
138+
doEnd = doDate['end']
139+
except:
140+
doEnd = ''
141+
try:
142+
doExpression = doDate['expression']
143+
except:
144+
doExpression = ''
145+
try:
146+
doLabel = doDate['label']
147+
except:
148+
doLabel = ''
149+
if aoBegin != '':
150+
doDate['begin'] = aoBegin
151+
if aoExpression != '':
152+
doDate['expression'] = aoExpression
153+
if aoLabel != '':
154+
doDate['label'] = aoLabel
155+
if aoEnd != '':
156+
doDate['end'] = aoEnd
157+
doOutput['dates'] = doDates
158+
output = json.dumps(doOutput)
159+
doPost = requests.post(baseURL + doUri, headers=headers, data=output).json()
160+
print doPost
161+
f.writerow([doUri]+[doBegin]+[doEnd]+[doExpression]+[doLabel]+[doAo]+[aoBegin]+[aoEnd]+[aoExpression]+[aoLabel]+[doPost])
162+
163+
elapsedTime = time.time() - startTime
164+
m, s = divmod(elapsedTime, 60)
165+
h, m = divmod(m, 60)
166+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)