Skip to content

Commit 99c2d61

Browse files
committed
post items and bitstreams (#11)
* post items and bitstreams * Update cli.py * Update cli.py
1 parent 881c50b commit 99c2d61

File tree

3 files changed

+148
-24
lines changed

3 files changed

+148
-24
lines changed

dsaps/cli.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import datetime
2+
import glob
3+
import json
24
import logging
5+
import os
36
import time
47

58
import click
@@ -69,15 +72,37 @@ def search(ctx, field, string, search_type):
6972
'collection.')
7073
@click.option('-n', '--coll_name', prompt='Enter the name of the collection',
7174
help='The name of the collection to be created.')
75+
@click.option('-m', '--metadata', prompt='Enter the path of the metadata file',
76+
help='The path of the JSON file of metadata.')
77+
@click.option('-f', '--file_path', prompt='Enter the path',
78+
help='The path of the content, a URL or local drive path.')
79+
@click.option('-t', '--file_type', prompt='Enter the file type',
80+
help='The file type to be uploaded.')
81+
@click.option('-i', '--ingest_type', prompt='Enter the type of ingest',
82+
help='The type of ingest to perform: local, remote.',
83+
type=click.Choice(['local', 'remote']))
7284
@click.pass_context
73-
def newcoll(ctx, comm_handle, coll_name):
85+
def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
86+
ingest_type):
7487
client = ctx.obj['client']
75-
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
76-
logger.info(coll_id)
77-
# STEPS TO ADD
78-
# post items to collections
79-
# post bistreams to item_links
80-
# post prov notes
88+
start_time = ctx.obj['start_time']
89+
with open(metadata, encoding='UTF-8') as fp:
90+
coll_metadata = json.load(fp)
91+
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
92+
file_dict = {}
93+
if ingest_type == 'local':
94+
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
95+
for file in files:
96+
file_name = os.path.splitext(os.path.basename(file))[0]
97+
file_dict[file_name] = file
98+
elif ingest_type == 'remote':
99+
file_dict = models.build_file_dict_remote(file_path, file_type,
100+
file_dict)
101+
items = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
102+
ingest_type)
103+
for item in items:
104+
logger.info(f'Item posted: {item}')
105+
models.elapsed_time(start_time, 'Total runtime:')
81106

82107

83108
if __name__ == '__main__':

dsaps/models.py

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import datetime
22
from functools import partial
33
import operator
4+
import os
45
import requests
56
import time
67

@@ -24,6 +25,7 @@ def __init__(self, url):
2425
logger.info('Initializing client')
2526

2627
def authenticate(self, email, password):
28+
"""Authenticate user to DSpace API."""
2729
header = self.header
2830
data = {'email': email, 'password': password}
2931
session = requests.post(f'{self.url}/login', headers=header,
@@ -54,6 +56,7 @@ def get_record(self, uuid, rec_type):
5456

5557
def filtered_item_search(self, key, string, query_type,
5658
selected_collections=''):
59+
"""Performs a search against the filtered items endpoint."""
5760
offset = 0
5861
items = ''
5962
item_links = []
@@ -63,10 +66,9 @@ def filtered_item_search(self, key, string, query_type,
6366
'query_val[]': string, '&collSel[]':
6467
selected_collections, 'limit': 200, 'offset': offset}
6568
logger.info(params)
66-
print(endpoint)
6769
response = requests.get(endpoint, headers=self.header,
6870
params=params, cookies=self.cookies)
69-
print(f'Response url: {response.url}')
71+
logger.info(f'Response url: {response.url}')
7072
response = response.json()
7173
items = response['items']
7274
for item in items:
@@ -75,6 +77,7 @@ def filtered_item_search(self, key, string, query_type,
7577
return item_links
7678

7779
def post_coll_to_comm(self, comm_handle, coll_name):
80+
"""Posts a collection to a specified community."""
7881
endpoint = f'{self.url}/handle/{comm_handle}'
7982
community = requests.get(endpoint, headers=self.header,
8083
cookies=self.cookies).json()
@@ -83,7 +86,52 @@ def post_coll_to_comm(self, comm_handle, coll_name):
8386
endpoint2 = f'{self.url}/communities/{comm_id}/collections'
8487
coll_id = requests.post(endpoint2, headers=self.header,
8588
cookies=self.cookies, json=collection).json()
86-
return coll_id['link']
89+
coll_id = coll_id['uuid']
90+
logger.info(f'Collection posted: {coll_id}')
91+
return coll_id
92+
93+
def post_items_to_coll(self, coll_id, coll_metadata, file_dict,
94+
ingest_type):
95+
"""Posts items to a specified collection."""
96+
for item_metadata in coll_metadata:
97+
file_exists = ''
98+
for element in [e for e in item_metadata['metadata']
99+
if e['key'] == 'file_identifier']:
100+
file_identifier = element['value']
101+
item_metadata['metadata'].remove(element)
102+
for k in [e for e in file_dict if file_identifier in e]:
103+
file_exists = True
104+
if file_exists is True:
105+
endpoint = f'{self.url}/collections/{coll_id}/items'
106+
item_id = requests.post(endpoint, headers=self.header,
107+
cookies=self.cookies,
108+
json=item_metadata).json()
109+
item_id = item_id['uuid']
110+
bit_ids = self.post_bitstreams_to_item(item_id,
111+
file_identifier,
112+
file_dict, ingest_type)
113+
for bit_id in bit_ids:
114+
logger.info(f'Bitstream posted: {bit_id}')
115+
yield item_id
116+
117+
def post_bitstreams_to_item(self, item_id, file_identifier, file_dict,
118+
ingest_type):
119+
"""Posts bitstreams to a specified item."""
120+
for k, v in file_dict.items():
121+
if k.startswith(file_identifier):
122+
bitstream = file_dict[k]
123+
file_name = os.path.basename(bitstream)
124+
if ingest_type == 'local':
125+
data = open(bitstream, 'rb')
126+
elif ingest_type == 'remote':
127+
data = requests.get(bitstream)
128+
endpoint = (f'{self.url}/items/{item_id}'
129+
+ f'/bitstreams?name={file_name}')
130+
header_upload = {'accept': 'application/json'}
131+
bit_id = requests.post(endpoint, headers=header_upload,
132+
cookies=self.cookies, data=data).json()
133+
bit_id = bit_id['uuid']
134+
yield bit_id
87135

88136
def _pop_inst(self, class_type, rec_obj):
89137
"""Populate class instance with data from record."""
@@ -100,6 +148,7 @@ def _pop_inst(self, class_type, rec_obj):
100148
return rec_obj
101149

102150
def _build_uuid_list(self, rec_obj, children):
151+
"""Builds a list of the uuids for an object's children."""
103152
child_list = []
104153
for child in rec_obj[children]:
105154
child_list.append(child['uuid'])
@@ -138,15 +187,14 @@ class MetadataEntry(BaseRecord):
138187
language = Field()
139188

140189

141-
def build_file_list_remote(directory_url, file_extension):
142-
"""Build list of files in local directory."""
143-
file_list = {}
190+
def build_file_dict_remote(directory_url, file_type, file_dict):
191+
"""Build list of files in a remote directory."""
144192
response = requests.get(directory_url)
145193
links = html.fromstring(response.content).iterlinks()
146-
for link in links:
147-
if link[2].endswith(file_extension):
148-
file_list[link[2]] = f'{directory_url}{link[2]}'
149-
return file_list
194+
for link in [l for l in links if l[2].endswith(file_type)]:
195+
file_identifier = link[2].replace(f'.{file_type}', '')
196+
file_dict[file_identifier] = f'{directory_url}{link[2]}'
197+
return file_dict
150198

151199

152200
def elapsed_time(start_time, label):

tests/test_models.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ def client():
1414
return client
1515

1616

17+
@pytest.fixture
18+
def sample_content(tmp_path):
19+
content = 'test'
20+
dir = tmp_path / 'sub'
21+
dir.mkdir()
22+
sample_content = dir / '123.pdf'
23+
sample_content.write_text(content)
24+
return sample_content
25+
26+
1727
def test_authenticate(client):
1828
"""Test authenticate function."""
1929
with requests_mock.Mocker() as m:
@@ -55,18 +65,58 @@ def test_filtered_item_search(client):
5565

5666

5767
def test_post_coll_to_comm(client):
68+
"""Test post_coll_to_comm function."""
5869
with requests_mock.Mocker() as m:
5970
comm_handle = '1234'
6071
coll_name = 'Test Collection'
6172
json_object_1 = {'uuid': 'a1b2'}
62-
json_object_2 = {'link': '5678'}
73+
json_object_2 = {'uuid': '5678'}
6374
m.get('mock://example.com/handle/1234', json=json_object_1)
6475
m.post('mock://example.com/communities/a1b2/collections',
6576
json=json_object_2)
6677
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
6778
assert coll_id == '5678'
6879

6980

81+
def test_post_items_to_coll(client, sample_content):
82+
"""Test post_items_to_coll function."""
83+
with requests_mock.Mocker() as m:
84+
coll_metadata = [{"metadata": [
85+
{"key": "file_identifier",
86+
"value": "123"},
87+
{"key": "dc.title", "value":
88+
"Monitoring Works: Getting Teachers",
89+
"language": "en_US"}]}]
90+
coll_id = '789'
91+
ingest_type = 'local'
92+
file_dict = {'123': sample_content}
93+
json_object_1 = {'uuid': 'a1b2'}
94+
m.post('mock://example.com/collections/789/items', json=json_object_1)
95+
url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf'
96+
json_object_2 = {'uuid': 'c3d4'}
97+
m.post(url, json=json_object_2)
98+
item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
99+
ingest_type)
100+
for item_id in item_ids:
101+
assert 'a1b2' == item_id
102+
103+
104+
def test_post_bitstreams_to_item(client, sample_content):
105+
"""Test post_bitstreams_to_item function."""
106+
with requests_mock.Mocker() as m:
107+
item_id = 'a1b2'
108+
ingest_type = 'local'
109+
file_identifier = '123'
110+
file_dict = {'123': sample_content}
111+
json_object_1 = {'uuid': 'c3d4'}
112+
url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf'
113+
m.post(url, json=json_object_1)
114+
bit_ids = client.post_bitstreams_to_item(item_id, file_identifier,
115+
file_dict, ingest_type)
116+
for bit_id in bit_ids:
117+
assert 'c3d4' == bit_id
118+
119+
70120
def test__pop_inst(client):
71121
"""Test _pop_inst function."""
72122
class_type = models.Collection
@@ -84,17 +134,18 @@ def test__build_uuid_list(client):
84134
assert '1234' in child_list
85135

86136

87-
def test_build_file_list_remote():
88-
"""Test build_file_list_remote function."""
137+
def test_build_file_dict_remote():
138+
"""Test build_file_dict_remote function."""
89139
content = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"><html>'
90140
content += '<head><title>Index of /pdf</title></head><body><h1>Index of /'
91141
content += 'pdf</h1><table><tr><th>Name</th><th>Last modified</th><th>'
92142
content += 'Size</th></tr><tr><td><a href="999.pdf">999.pdf</a></td><td>'
93143
content += '2001-02-16 11:59 </td><td>107K</td></tr></table></body></html>'
94144
with requests_mock.Mocker() as m:
95145
directory_url = 'mock://test.com/pdfs/'
96-
file_extension = 'pdf'
146+
file_type = 'pdf'
147+
file_dict = {}
97148
m.get(directory_url, text=content)
98-
file_list = models.build_file_list_remote(directory_url,
99-
file_extension)
100-
assert '999.pdf' in file_list
149+
file_list = models.build_file_dict_remote(directory_url, file_type,
150+
file_dict)
151+
assert '999' in file_list

0 commit comments

Comments
 (0)