Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions dsaps/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import datetime
import glob
import json
import logging
import os
import time

import click
Expand Down Expand Up @@ -69,15 +72,37 @@ def search(ctx, field, string, search_type):
'collection.')
@click.option('-n', '--coll_name', prompt='Enter the name of the collection',
help='The name of the collection to be created.')
@click.option('-m', '--metadata', prompt='Enter the path of the metadata file',
help='The path of the JSON file of metadata.')
@click.option('-f', '--file_path', prompt='Enter the path',
help='The path of the content, a URL or local drive path.')
@click.option('-t', '--file_type', prompt='Enter the file type',
help='The file type to be uploaded.')
@click.option('-i', '--ingest_type', prompt='Enter the type of ingest',
help='The type of ingest to perform: local, remote.',
type=click.Choice(['local', 'remote']))
@click.pass_context
def newcoll(ctx, comm_handle, coll_name):
def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
ingest_type):
client = ctx.obj['client']
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
logger.info(coll_id)
# STEPS TO ADD
# post items to collections
# post bistreams to item_links
# post prov notes
start_time = ctx.obj['start_time']
with open(metadata, encoding='UTF-8') as fp:
coll_metadata = json.load(fp)
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
file_dict = {}
if ingest_type == 'local':
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
for file in files:
file_name = os.path.splitext(os.path.basename(file))[0]
file_dict[file_name] = file
elif ingest_type == 'remote':
file_dict = models.build_file_dict_remote(file_path, file_type,
file_dict)
items = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
ingest_type)
for item in items:
logger.info(f'Item posted: {item}')
models.elapsed_time(start_time, 'Total runtime:')


if __name__ == '__main__':
Expand Down
68 changes: 58 additions & 10 deletions dsaps/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
from functools import partial
import operator
import os
import requests
import time

Expand All @@ -24,6 +25,7 @@ def __init__(self, url):
logger.info('Initializing client')

def authenticate(self, email, password):
"""Authenticate user to DSpace API."""
header = self.header
data = {'email': email, 'password': password}
session = requests.post(f'{self.url}/login', headers=header,
Expand Down Expand Up @@ -54,6 +56,7 @@ def get_record(self, uuid, rec_type):

def filtered_item_search(self, key, string, query_type,
selected_collections=''):
"""Performs a search against the filtered items endpoint."""
offset = 0
items = ''
item_links = []
Expand All @@ -63,10 +66,9 @@ def filtered_item_search(self, key, string, query_type,
'query_val[]': string, '&collSel[]':
selected_collections, 'limit': 200, 'offset': offset}
logger.info(params)
print(endpoint)
response = requests.get(endpoint, headers=self.header,
params=params, cookies=self.cookies)
print(f'Response url: {response.url}')
logger.info(f'Response url: {response.url}')
response = response.json()
items = response['items']
for item in items:
Expand All @@ -75,6 +77,7 @@ def filtered_item_search(self, key, string, query_type,
return item_links

def post_coll_to_comm(self, comm_handle, coll_name):
"""Posts a collection to a specified community."""
endpoint = f'{self.url}/handle/{comm_handle}'
community = requests.get(endpoint, headers=self.header,
cookies=self.cookies).json()
Expand All @@ -83,7 +86,52 @@ def post_coll_to_comm(self, comm_handle, coll_name):
endpoint2 = f'{self.url}/communities/{comm_id}/collections'
coll_id = requests.post(endpoint2, headers=self.header,
cookies=self.cookies, json=collection).json()
return coll_id['link']
coll_id = coll_id['uuid']
logger.info(f'Collection posted: {coll_id}')
return coll_id

def post_items_to_coll(self, coll_id, coll_metadata, file_dict,
ingest_type):
"""Posts items to a specified collection."""
for item_metadata in coll_metadata:
file_exists = ''
for element in [e for e in item_metadata['metadata']
if e['key'] == 'file_identifier']:
file_identifier = element['value']
item_metadata['metadata'].remove(element)
for k in [e for e in file_dict if file_identifier in e]:
file_exists = True
if file_exists is True:
endpoint = f'{self.url}/collections/{coll_id}/items'
item_id = requests.post(endpoint, headers=self.header,
cookies=self.cookies,
json=item_metadata).json()
item_id = item_id['uuid']
bit_ids = self.post_bitstreams_to_item(item_id,
file_identifier,
file_dict, ingest_type)
for bit_id in bit_ids:
logger.info(f'Bitstream posted: {bit_id}')
yield item_id

def post_bitstreams_to_item(self, item_id, file_identifier, file_dict,
ingest_type):
"""Posts bitstreams to a specified item."""
for k, v in file_dict.items():
if k.startswith(file_identifier):
bitstream = file_dict[k]
file_name = os.path.basename(bitstream)
if ingest_type == 'local':
data = open(bitstream, 'rb')
elif ingest_type == 'remote':
data = requests.get(bitstream)
endpoint = (f'{self.url}/items/{item_id}'
+ f'/bitstreams?name={file_name}')
header_upload = {'accept': 'application/json'}
bit_id = requests.post(endpoint, headers=header_upload,
cookies=self.cookies, data=data).json()
bit_id = bit_id['uuid']
yield bit_id

def _pop_inst(self, class_type, rec_obj):
"""Populate class instance with data from record."""
Expand All @@ -100,6 +148,7 @@ def _pop_inst(self, class_type, rec_obj):
return rec_obj

def _build_uuid_list(self, rec_obj, children):
"""Builds a list of the uuids for an object's children."""
child_list = []
for child in rec_obj[children]:
child_list.append(child['uuid'])
Expand Down Expand Up @@ -138,15 +187,14 @@ class MetadataEntry(BaseRecord):
language = Field()


def build_file_list_remote(directory_url, file_extension):
"""Build list of files in local directory."""
file_list = {}
def build_file_dict_remote(directory_url, file_type, file_dict):
"""Build list of files in a remote directory."""
response = requests.get(directory_url)
links = html.fromstring(response.content).iterlinks()
for link in links:
if link[2].endswith(file_extension):
file_list[link[2]] = f'{directory_url}{link[2]}'
return file_list
for link in [l for l in links if l[2].endswith(file_type)]:
file_identifier = link[2].replace(f'.{file_type}', '')
file_dict[file_identifier] = f'{directory_url}{link[2]}'
return file_dict


def elapsed_time(start_time, label):
Expand Down
65 changes: 58 additions & 7 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ def client():
return client


@pytest.fixture
def sample_content(tmp_path):
content = 'test'
dir = tmp_path / 'sub'
dir.mkdir()
sample_content = dir / '123.pdf'
sample_content.write_text(content)
return sample_content


def test_authenticate(client):
"""Test authenticate function."""
with requests_mock.Mocker() as m:
Expand Down Expand Up @@ -55,18 +65,58 @@ def test_filtered_item_search(client):


def test_post_coll_to_comm(client):
"""Test post_coll_to_comm function."""
with requests_mock.Mocker() as m:
comm_handle = '1234'
coll_name = 'Test Collection'
json_object_1 = {'uuid': 'a1b2'}
json_object_2 = {'link': '5678'}
json_object_2 = {'uuid': '5678'}
m.get('mock://example.com/handle/1234', json=json_object_1)
m.post('mock://example.com/communities/a1b2/collections',
json=json_object_2)
coll_id = client.post_coll_to_comm(comm_handle, coll_name)
assert coll_id == '5678'


def test_post_items_to_coll(client, sample_content):
"""Test post_items_to_coll function."""
with requests_mock.Mocker() as m:
coll_metadata = [{"metadata": [
{"key": "file_identifier",
"value": "123"},
{"key": "dc.title", "value":
"Monitoring Works: Getting Teachers",
"language": "en_US"}]}]
coll_id = '789'
ingest_type = 'local'
file_dict = {'123': sample_content}
json_object_1 = {'uuid': 'a1b2'}
m.post('mock://example.com/collections/789/items', json=json_object_1)
url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf'
json_object_2 = {'uuid': 'c3d4'}
m.post(url, json=json_object_2)
item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict,
ingest_type)
for item_id in item_ids:
assert 'a1b2' == item_id


def test_post_bitstreams_to_item(client, sample_content):
"""Test post_bitstreams_to_item function."""
with requests_mock.Mocker() as m:
item_id = 'a1b2'
ingest_type = 'local'
file_identifier = '123'
file_dict = {'123': sample_content}
json_object_1 = {'uuid': 'c3d4'}
url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf'
m.post(url, json=json_object_1)
bit_ids = client.post_bitstreams_to_item(item_id, file_identifier,
file_dict, ingest_type)
for bit_id in bit_ids:
assert 'c3d4' == bit_id


def test__pop_inst(client):
"""Test _pop_inst function."""
class_type = models.Collection
Expand All @@ -84,17 +134,18 @@ def test__build_uuid_list(client):
assert '1234' in child_list


def test_build_file_list_remote():
"""Test build_file_list_remote function."""
def test_build_file_dict_remote():
"""Test build_file_dict_remote function."""
content = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"><html>'
content += '<head><title>Index of /pdf</title></head><body><h1>Index of /'
content += 'pdf</h1><table><tr><th>Name</th><th>Last modified</th><th>'
content += 'Size</th></tr><tr><td><a href="999.pdf">999.pdf</a></td><td>'
content += '2001-02-16 11:59 </td><td>107K</td></tr></table></body></html>'
with requests_mock.Mocker() as m:
directory_url = 'mock://test.com/pdfs/'
file_extension = 'pdf'
file_type = 'pdf'
file_dict = {}
m.get(directory_url, text=content)
file_list = models.build_file_list_remote(directory_url,
file_extension)
assert '999.pdf' in file_list
file_list = models.build_file_dict_remote(directory_url, file_type,
file_dict)
assert '999' in file_list