Skip to content

Commit aeabc2d

Browse files
committed
PR updates
1 parent 59550f4 commit aeabc2d

File tree

12 files changed

+250
-358
lines changed

12 files changed

+250
-358
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,5 +180,6 @@ local/*
180180
*.csv
181181
!tests/files/*.csv
182182
*.json
183+
!config/*.json
183184
createItemMetadataFromCSV_*
184185
*.txt

config/aspace_mapping.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"file_identifier": {
3+
"csv_field_name": "file_identifier",
4+
"language": null,
5+
"delimiter": ""
6+
},
7+
"dc.title": {
8+
"csv_field_name": "title",
9+
"language": "en_US",
10+
"delimiter": ""
11+
},
12+
"dc.relation.isversionof": {
13+
"csv_field_name": "uri",
14+
"language": null,
15+
"delimiter": ""
16+
}
17+
}

config/standard_mapping.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"file_identifier": {
3+
"csv_field_name": "file_identifier",
4+
"language": null,
5+
"delimiter": ""
6+
},
7+
"dc.title": {
8+
"csv_field_name": "title",
9+
"language": "en_US",
10+
"delimiter": ""
11+
},
12+
"dc.relation.isversionof": {
13+
"csv_field_name": "uri",
14+
"language": null,
15+
"delimiter": ""
16+
},
17+
"dc.contributor.author": {
18+
"csv_field_name": "authors",
19+
"language": null,
20+
"delimiter": "|"
21+
}
22+
}

dsaps/cli.py

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
logger = structlog.get_logger()
1515

1616

17-
@click.group()
17+
@click.group(chain=True)
1818
@click.option('--url', envvar='DSPACE_URL')
1919
@click.option('-e', '--email', envvar='TEST_EMAIL',
2020
help='The email of the user for authentication.')
@@ -50,9 +50,6 @@ def main(ctx, url, email, password):
5050

5151

5252
@main.command()
53-
@click.option('-c', '--collection-handle', required=True,
54-
help='The handle of the collection to which items are being '
55-
'added.')
5653
@click.option('-m', '--metadata-csv', required=True,
5754
help='The full path to the CSV file of metadata for the items.')
5855
@click.option('--field-map', required=True,
@@ -66,64 +63,50 @@ def main(ctx, url, email, password):
6663
@click.option('-r', '--ingest-report', is_flag=True,
6764
help='Create ingest report for updating other systems.')
6865
@click.pass_context
69-
def additems(ctx, collection_handle, metadata_csv, field_map,
70-
directory, file_type, ingest_report):
66+
def additems(ctx, metadata_csv, field_map, directory, file_type,
67+
ingest_report):
7168
client = ctx.obj['client']
7269
start_time = ctx.obj['start_time']
70+
collection_uuid = ctx.obj['collection_uuid']
7371
with open(metadata_csv, 'r') as csvfile, open(field_map, 'r') as jsonfile:
7472
metadata = csv.DictReader(csvfile)
7573
mapping = json.load(jsonfile)
7674
collection = Collection.from_csv(metadata, mapping)
7775
for item in collection.items:
7876
item.bitstreams_from_directory(directory, file_type)
79-
collection_uuid = client.get_id_from_handle(collection_handle)
80-
collection.handle = collection_handle
8177
collection.uuid = collection_uuid
82-
collection.post_items(client)
78+
items = collection.post_items(client)
79+
if ingest_report:
80+
report_name = metadata_csv.replace('.csv', '-ingest.csv')
81+
helpers.create_ingest_report(items, report_name)
8382
helpers.elapsed_time(start_time, 'Total runtime:')
8483

85-
#
86-
# @main.command()
87-
# @click.option('-c', '--comm_handle', prompt='Enter the community handle',
88-
# help='The handle of the community in which to create the ,'
89-
# 'collection.')
90-
# @click.option('-n', '--coll_name', prompt='Enter the name of the collection',
91-
# help='The name of the collection to be created.')
92-
# @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
93-
# help='The path of the CSV file of metadata.')
94-
# @click.option('-f', '--file_path', prompt='Enter the path',
95-
# help='The path of the content, a URL or local drive path.')
96-
# @click.option('-t', '--file_type', prompt='Enter the file type',
97-
# help='The file type to be uploaded.')
98-
# @click.option('-i', '--ingest_type', prompt='Enter the type of ingest',
99-
# help='The type of ingest to perform: local, remote.',
100-
# type=click.Choice(['local', 'remote']), default='remote')
101-
# @click.option('-r', '--ingest_report', prompt='Create an ingest report?',
102-
# help='Create ingest report for updating other systems',
103-
# default=False)
104-
# @click.option('-u', '--multiple_terms', prompt='Method of separating terms?',
105-
# help='The way multiple terms are separated in the metadata CSV.',
106-
# type=click.Choice(['delimited', 'num_columns']),
107-
# default='delimited')
108-
# @click.pass_context
109-
# def newcoll(ctx, comm_handle, coll_name, metadata_csv, file_path, file_type,
110-
# ingest_type, ingest_report, multiple_terms):
111-
# client = ctx.obj['client']
112-
# start_time = ctx.obj['start_time']
113-
# ingest_data = {}
114-
# json_metadata = metadata.create_json_metadata(metadata_csv, multiple_terms)
115-
# items = workflows.populate_new_coll(client, comm_handle, coll_name,
116-
# ingest_type, file_path, file_type,
117-
# json_metadata, ingest_report,
118-
# ingest_data)
119-
# for item in items:
120-
# logger.info(f'Item posted: {item}')
121-
# if ingest_report == 'True':
122-
# report_name = metadata_csv.replace('.csv', '-ingest.csv')
123-
# helpers.create_ingest_report(ingest_data, report_name)
124-
# helpers.elapsed_time(start_time, 'Total runtime:')
125-
#
126-
#
84+
85+
@main.command()
86+
@click.option('-c', '--collection-handle', required=True,
87+
help='The handle of the collection to which items are being '
88+
'added.')
89+
@click.pass_context
90+
def existingcollection(ctx, collection_handle):
91+
client = ctx.obj['client']
92+
collection_uuid = client.get_id_from_handle(collection_handle)
93+
ctx.obj['collection_uuid'] = collection_uuid
94+
95+
96+
@main.command()
97+
@click.option('-c', '--community-handle', required=True,
98+
help='The handle of the community in which to create the ,'
99+
'collection.')
100+
@click.option('-n', '--collection-name', required=True,
101+
help='The name of the collection to be created.')
102+
@click.pass_context
103+
def newcollection(ctx, community_handle, collection_name):
104+
client = ctx.obj['client']
105+
collection_uuid = client.post_coll_to_comm(community_handle,
106+
collection_name)
107+
ctx.obj['collection_uuid'] = collection_uuid
108+
109+
127110
# @main.command()
128111
# @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
129112
# help='The path of the CSV file of metadata.')

dsaps/helpers.py

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,15 @@
1-
import collections
21
import csv
32
import datetime
43
import glob
54
import os
6-
import requests
75
import time
86

9-
from lxml import html
107
import structlog
118

129

1310
logger = structlog.get_logger()
1411

1512

16-
1713
def create_csv_from_list(list_name, output):
1814
"""Creates CSV file from list content."""
1915
with open(f'{output}.csv', 'w') as csvfile:
@@ -25,24 +21,23 @@ def create_csv_from_list(list_name, output):
2521

2622
def create_file_dict(file_path, file_type):
2723
"""Creates a dict of file IDs and file paths."""
28-
if file_path.startswith('http'):
29-
file_dict = build_file_dict_remote(file_path, file_type, {})
30-
else:
31-
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
32-
file_dict = {}
33-
for file in files:
34-
file_name = os.path.splitext(os.path.basename(file))[0]
35-
file_dict[file_name] = file
24+
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
25+
file_dict = {}
26+
for file in files:
27+
file_name = os.path.splitext(os.path.basename(file))[0]
28+
file_dict[file_name] = file
3629
return file_dict
3730

3831

39-
def create_ingest_report(ingest_data, file_name):
40-
"""Creates ingest report of handles and DOS links."""
41-
with open(f'{file_name}.csv', 'w') as writecsv:
32+
def create_ingest_report(items, file_name):
33+
"""Creates ingest report of other systems' identifiers with a newly created
34+
DSpace handle."""
35+
with open(f'{file_name}', 'w') as writecsv:
4236
writer = csv.writer(writecsv)
4337
writer.writerow(['uri'] + ['link'])
44-
for uri, handle in ingest_data.items():
45-
writer.writerow([uri] + [f'https://hdl.handle.net/{handle}'])
38+
for item in items:
39+
writer.writerow([item.source_system_identifier]
40+
+ [f'https://hdl.handle.net/{item.handle}'])
4641

4742

4843
def create_metadata_id_list(metadata_csv):
@@ -81,22 +76,6 @@ def match_metadata_to_files(file_dict, metadata_ids):
8176
return metadata_matches
8277

8378

84-
def select_bitstreams(ingest_type, file_dict, file_identifier):
85-
"""Select the appropriate bitstreams for posting to an item."""
86-
sel_bitstreams = []
87-
file_dict = collections.OrderedDict(sorted(file_dict.items()))
88-
for k in [e for e in file_dict if e.startswith(file_identifier)]:
89-
pass
90-
for bitstream_id in [k for k, v in file_dict.items()
91-
if k.startswith(file_identifier)]:
92-
if ingest_type == 'local':
93-
data = open(file_dict[bitstream_id], 'rb')
94-
elif ingest_type == 'remote':
95-
data = requests.get(file_dict[bitstream_id]).content
96-
sel_bitstreams.append(data)
97-
return sel_bitstreams
98-
99-
10079
def update_metadata_csv(metadata_csv, output_path, metadata_matches):
10180
"""Creates an updated CSV of metadata records with matching files."""
10281
with open(metadata_csv) as csvfile:

dsaps/models.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
import glob
33
import operator
44
import os
5-
import structlog
65

76
import attr
87
import requests
8+
import structlog
99

1010
Field = partial(attr.ib, default=None)
1111
Group = partial(attr.ib, default=[])
@@ -111,10 +111,11 @@ def post_coll_to_comm(self, comm_handle, coll_name):
111111
def post_item_to_collection(self, collection_id, item):
112112
"""Posts item to a specified collection and returns the item ID."""
113113
endpoint = f'{self.url}/collections/{collection_id}/items'
114-
post_response = requests.post(endpoint, headers=self.header,
115-
cookies=self.cookies,
116-
json=attr.asdict(item)['metadata']).json()
114+
post_response = requests.post(
115+
endpoint, headers=self.header, cookies=self.cookies,
116+
json={'metadata': attr.asdict(item)['metadata']}).json()
117117
item_id = post_response['uuid']
118+
item.handle = post_response['handle']
118119
return item_id
119120

120121
def _pop_inst(self, class_type, rec_obj):
@@ -155,14 +156,17 @@ class Collection(BaseRecord):
155156
def post_items(self, client):
156157
for item in self.items:
157158
item_id = client.post_item_to_collection(self.uuid, item)
159+
item.uuid = item_id
160+
logger.info(f'Item posted: {item_id}')
158161
for bitstream in item.bitstreams:
159-
client.post_bitstream(item_id, bitstream)
160-
yield item, bitstream
162+
bitstream_id = client.post_bitstream(item_id, bitstream)
163+
logger.info(f'Bitstream posted: {bitstream_id}')
164+
yield item
161165

162166
@classmethod
163167
def from_csv(cls, csv_reader, field_map):
164168
items = [
165-
Item.metadata_from_row(row, field_map) for row in csv_reader
169+
Item.from_row(row, field_map) for row in csv_reader
166170
]
167171
return cls(items=items)
168172

@@ -176,23 +180,33 @@ class Community(BaseRecord):
176180
class Item(BaseRecord):
177181
metadata = Group()
178182
bitstreams = Group()
183+
file_identifier = Field()
184+
source_system_identifier = Field()
179185

180186
def bitstreams_from_directory(self, directory, file_type='*'):
181-
file_identifier = [m.value for m in self.metadata if
182-
m.key == 'file_identifier'][0]
183-
file_list = glob.iglob(
184-
f'{directory}/**/{file_identifier}*.{file_type}', recursive=True
187+
files = glob.iglob(
188+
f'{directory}/**/{self.file_identifier}*.{file_type}',
189+
recursive=True
185190
)
191+
file_list = []
192+
for file in files:
193+
file_list.append(file)
194+
file_list.sort()
186195
self.bitstreams = [
187196
Bitstream(name=os.path.basename(f),
188197
file_path=f) for f in file_list
189198
]
190199

191200
@classmethod
192-
def metadata_from_row(cls, row, field_map):
201+
def from_row(cls, row, field_map):
193202
metadata = []
194203
for f in field_map:
195204
field = row[field_map[f]['csv_field_name']]
205+
if f == 'file_identifier':
206+
file_identifier = field
207+
continue # file_identifier is not included in DSpace metadata
208+
if f == 'dc.relation.isversionof':
209+
source_system_identifier = field
196210
delimiter = field_map[f]['delimiter']
197211
language = field_map[f]['language']
198212
if delimiter:
@@ -204,7 +218,8 @@ def metadata_from_row(cls, row, field_map):
204218
metadata.append(
205219
MetadataEntry(key=f, value=field, language=language)
206220
)
207-
return cls(metadata=metadata)
221+
return cls(metadata=metadata, file_identifier=file_identifier,
222+
source_system_identifier=source_system_identifier)
208223

209224

210225
@attr.s

0 commit comments

Comments
 (0)