diff --git a/.travis.yml b/.travis.yml index 3a2a5d7..4b1d45a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - "3.7" + - "3.8" install: - pipenv install --dev script: diff --git a/Pipfile b/Pipfile index 1da3184..c02ecb3 100644 --- a/Pipfile +++ b/Pipfile @@ -10,12 +10,12 @@ requests-mock = "*" [packages] requests = "*" structlog = "*" -attr = "*" +attrs = "*" click = "*" lxml = "*" [requires] -python_version = "3.7" +python_version = "3.8" [scripts] dsaps = "python -c \"from dsaps.cli import main; main()\"" diff --git a/dsaps/cli.py b/dsaps/cli.py index 9881ed2..92a7145 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -24,6 +24,8 @@ @click.pass_context def main(ctx, url, email, password): ctx.obj = {} + if os.path.isdir('logs') is False: + os.mkdir('logs') dt = datetime.datetime.utcnow().isoformat(timespec='seconds') log_suffix = f'{dt}.log' structlog.configure(processors=[ @@ -47,31 +49,6 @@ def main(ctx, url, email, password): ctx.obj['start_time'] = start_time -@click.group() -def aux(): - pass - - -@main.command() -@click.option('-f', '--field', prompt='Enter the field to be searched', - help='The field to search.') -@click.option('-s', '--string', prompt='Enter the string', - help='The field to search.') -@click.option('-t', '--search_type', prompt='Enter the type of search', - help='The type of search.', - type=click.Choice(['exists', 'doesnt_exist', 'equals', - 'not_equals', 'contains', 'doesnt_contain']), - default='contains') -@click.pass_context -def search(ctx, field, string, search_type): - # Temp function for testing - client = ctx.obj['client'] - start_time = ctx.obj['start_time'] - item_links = client.filtered_item_search(field, string, search_type) - logger.info(item_links) - models.elapsed_time(start_time, 'Elapsed time') - - @main.command() @click.option('-c', '--comm_handle', prompt='Enter the community handle', help='The handle of the community in which to create the ,' @@ -111,7 +88,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') -@aux.command() +@main.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') @click.option('-f', '--file_path', prompt='Enter the path', @@ -150,7 +127,7 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') -@aux.command() +@main.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') def metadatajson(metadata_csv): @@ -190,7 +167,5 @@ def metadatajson(metadata_csv): json.dump(metadata_group, f) -cli = click.CommandCollection(sources=[main, aux]) - if __name__ == '__main__': - cli() + main() diff --git a/dsaps/models.py b/dsaps/models.py index aabc523..7f0b400 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,3 +1,4 @@ +import collections import csv import datetime from functools import partial @@ -77,6 +78,13 @@ def filtered_item_search(self, key, string, query_type, offset = offset + 200 return item_links + def get_id_from_handle(self, handle): + """Retrieves UUID for an object based on its handle.""" + endpoint = f'{self.url}/handle/{handle}' + rec_obj = requests.get(endpoint, headers=self.header, + cookies=self.cookies).json() + return rec_obj['uuid'] + def post_coll_to_comm(self, comm_handle, coll_name): """Posts a collection to a specified community.""" endpoint = f'{self.url}/handle/{comm_handle}' @@ -117,22 +125,29 @@ def post_items_to_coll(self, coll_id, coll_metadata, file_dict, def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, ingest_type): - """Posts bitstreams to a specified item.""" - for k, v in file_dict.items(): - if k.startswith(file_identifier): - bitstream = file_dict[k] - file_name = os.path.basename(bitstream) - if ingest_type == 'local': - data = open(bitstream, 'rb') - elif ingest_type == 'remote': - data = requests.get(bitstream) - endpoint = (f'{self.url}/items/{item_id}' - + f'/bitstreams?name={file_name}') - header_upload = {'accept': 'application/json'} - bit_id = requests.post(endpoint, headers=header_upload, - cookies=self.cookies, data=data).json() - bit_id = bit_id['uuid'] - yield bit_id + """Post a sorted set of bitstreams to a specified item.""" + file_dict = collections.OrderedDict(sorted(file_dict.items())) + for bitstream, v in file_dict.items(): + bit_id = self.post_bitstream(item_id, file_identifier, file_dict, + ingest_type, bitstream) + yield bit_id + + def post_bitstream(self, item_id, file_identifier, file_dict, ingest_type, + bitstream): + """Post a bitstream to a specified item.""" + bitstream_path = file_dict[bitstream] + file_name = os.path.basename(bitstream_path) + if ingest_type == 'local': + data = open(bitstream_path, 'rb') + elif ingest_type == 'remote': + data = requests.get(bitstream_path) + endpoint = (f'{self.url}/items/{item_id}' + + f'/bitstreams?name={file_name}') + header_upload = {'accept': 'application/json'} + bit_id = requests.post(endpoint, headers=header_upload, + cookies=self.cookies, data=data).json() + bit_id = bit_id['uuid'] + return bit_id def _pop_inst(self, class_type, rec_obj): """Populate class instance with data from record.""" @@ -192,7 +207,7 @@ def build_file_dict_remote(directory_url, file_type, file_dict): """Build list of files in a remote directory.""" response = requests.get(directory_url) links = html.fromstring(response.content).iterlinks() - for link in [l for l in links if l[2].endswith(file_type)]: + for link in [i for i in links if i[2].endswith(file_type)]: file_identifier = link[2].replace(f'.{file_type}', '') file_dict[file_identifier] = f'{directory_url}{link[2]}' return file_dict @@ -213,24 +228,29 @@ def elapsed_time(start_time, label): logger.info(f'{label} : {td}') -def metadata_csv(row, key, field, language=None): - """Create metadata element from CSV.""" - value = row[field] - if language is not None: - metadata_elem = {'key': key, 'language': language, 'value': - value} - else: - metadata_elem = {'key': key, 'value': value} - return metadata_elem +def metadata_elems_from_row(row, key, field, language=None, delimiter=''): + """Create a metadata element from a CSV row.""" + metadata_elems = [] + if row[field] != '': + if delimiter: + values = row[field].split(delimiter) + else: + values = [row[field]] + for value in values: + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_elems.append({k: v for k, v in metadata_elem.items() + if v is not None}) + return metadata_elems def create_metadata_rec(mapping_dict, row, metadata_rec): - """Create metadata record from CSV.""" + """Create metadata record from a series of metadata elements.""" for k, v in mapping_dict.items(): - if len(v) == 2: - metadata_elem = metadata_csv(row, k, v[0], v[1]) + if len(v) == 3: + metadata_elems = metadata_elems_from_row(row, k, v[0], v[1], v[2]) else: - metadata_elem = metadata_csv(row, k, v[0]) - if metadata_elem['value'] != '': + metadata_elems = metadata_elems_from_row(row, k, v[0]) + for metadata_elem in metadata_elems: metadata_rec.append(metadata_elem) return metadata_rec diff --git a/setup.py b/setup.py index 15bcbdf..33de900 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup( - name='DSpace API Python Scripts', + name='dsaps', version='1.0.0', description='', packages=find_packages(exclude=['tests']), @@ -10,7 +10,7 @@ install_requires=[ 'requests', 'structlog', - 'attr', + 'attrs', 'click', 'lxml', ], @@ -19,5 +19,5 @@ 'dsaps=dsaps.cli:main', ] }, - python_requires='>=3.7.1', + python_requires='>=3.8', ) diff --git a/tests/test_models.py b/tests/test_models.py index a61e813..49224cb 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,11 +15,20 @@ def client(): @pytest.fixture -def sample_content(tmp_path): +def sample_content_1(tmp_path): content = 'test' dir = tmp_path / 'sub' dir.mkdir() - sample_content = dir / '123.pdf' + sample_content = dir / '123_1.pdf' + sample_content.write_text(content) + return sample_content + + +@pytest.fixture +def sample_content_2(tmp_path): + content = 'test' + dir = tmp_path / 'sub' + sample_content = dir / '123_2.pdf' sample_content.write_text(content) return sample_content @@ -30,9 +39,9 @@ def test_authenticate(client): email = 'test@test.mock' password = '1234' cookies = {'JSESSIONID': '11111111'} - json_object = {'fullname': 'User Name'} + user_json = {'fullname': 'User Name'} m.post('mock://example.com/login', cookies=cookies) - m.get('mock://example.com/status', json=json_object) + m.get('mock://example.com/status', json=user_json) client.authenticate(email, password) assert client.user_full_name == 'User Name' assert client.cookies == cookies @@ -42,10 +51,10 @@ def test_get_record(client): """Test get_record method.""" with requests_mock.Mocker() as m: uri = 'mock://example.com/items/123?expand=all' - json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'} - m.get(uri, json=json_object) + rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} + m.get(uri, json=rec_json) rec_obj = client.get_record('123', 'items') - assert attr.asdict(rec_obj)['metadata'] == json_object['metadata'] + assert attr.asdict(rec_obj)['metadata'] == rec_json['metadata'] def test_filtered_item_search(client): @@ -55,30 +64,40 @@ def test_filtered_item_search(client): string = 'test' query_type = 'contains' endpoint = 'mock://example.com/filtered-items?' - json_object_1 = {'items': [{'link': '1234'}]} - json_object_2 = {'items': []} - m.get(endpoint, [{'json': json_object_1}, {'json': json_object_2}]) + results_json1 = {'items': [{'link': '1234'}]} + results_json2 = {'items': []} + m.get(endpoint, [{'json': results_json1}, {'json': results_json2}]) item_links = client.filtered_item_search(key, string, query_type, selected_collections='') assert '1234' in item_links +def test_get_id_from_handle(client): + """Test get_id_from_handle method.""" + with requests_mock.Mocker() as m: + handle = 'mock://example.com/handle/111.1111' + rec_json = {'uuid': '123'} + m.get(handle, json=rec_json) + id = client.get_id_from_handle('111.1111') + assert id == '123' + + def test_post_coll_to_comm(client): """Test post_coll_to_comm method.""" with requests_mock.Mocker() as m: comm_handle = '1234' coll_name = 'Test Collection' - json_object_1 = {'uuid': 'a1b2'} - json_object_2 = {'uuid': '5678'} - m.get('mock://example.com/handle/1234', json=json_object_1) + comm_json = {'uuid': 'a1b2'} + coll_json = {'uuid': '5678'} + m.get('mock://example.com/handle/1234', json=comm_json) m.post('mock://example.com/communities/a1b2/collections', - json=json_object_2) + json=coll_json) coll_id = client.post_coll_to_comm(comm_handle, coll_name) assert coll_id == '5678' -def test_post_items_to_coll(client, sample_content): +def test_post_items_to_coll(client, sample_content_1): """Test post_items_to_coll method.""" with requests_mock.Mocker() as m: coll_metadata = [{"metadata": [ @@ -86,35 +105,59 @@ def test_post_items_to_coll(client, sample_content): "value": "123"}, {"key": "dc.title", "value": "Monitoring Works: Getting Teachers", - "language": "en_US"}]}] + "language": "en_US"}, + {"key": "dc.relation.isversionof", + "value": "repo/0/ao/123"}]}] coll_id = '789' ingest_type = 'local' - file_dict = {'123': sample_content} - json_object_1 = {'uuid': 'a1b2'} - m.post('mock://example.com/collections/789/items', json=json_object_1) - url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' - json_object_2 = {'uuid': 'c3d4'} - m.post(url, json=json_object_2) + file_dict = {'123': sample_content_1} + item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} + m.post('mock://example.com/collections/789/items', json=item_json) + url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + b_json = {'uuid': 'c3d4'} + m.post(url, json=b_json) item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, ingest_type) for item_id in item_ids: assert 'a1b2' == item_id -def test_post_bitstreams_to_item(client, sample_content): +def test_post_bitstreams_to_item(client, sample_content_1, sample_content_2): """Test post_bitstreams_to_item method.""" with requests_mock.Mocker() as m: item_id = 'a1b2' ingest_type = 'local' file_identifier = '123' - file_dict = {'123': sample_content} - json_object_1 = {'uuid': 'c3d4'} - url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' - m.post(url, json=json_object_1) + file_dict = {'123_2': sample_content_2, '123_1': sample_content_1} + b_json_1 = {'uuid': 'c3d4'} + url_1 = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + m.post(url_1, json=b_json_1) + b_json_2 = {'uuid': 'e5f6'} + url_2 = 'mock://example.com/items/a1b2/bitstreams?name=123_2.pdf' + m.post(url_2, json=b_json_2) bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, file_dict, ingest_type) + bit_ids_output = [] for bit_id in bit_ids: - assert 'c3d4' == bit_id + bit_ids_output.append(bit_id) + assert bit_ids_output[0] == 'c3d4' + assert bit_ids_output[1] == 'e5f6' + + +def test_post_bitstream(client, sample_content_1): + """Test post_bitstream method.""" + with requests_mock.Mocker() as m: + item_id = 'a1b2' + ingest_type = 'local' + file_identifier = '123' + file_dict = {'123': sample_content_1} + b_json = {'uuid': 'c3d4'} + url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + bitstream = '123' + m.post(url, json=b_json) + bit_id = client.post_bitstream(item_id, file_identifier, file_dict, + ingest_type, bitstream) + assert 'c3d4' == bit_id def test__pop_inst(client): @@ -157,12 +200,34 @@ def test_build_file_dict_remote(): # assert False -def test_metadata_csv(): - """Test metadata_csv function.""" +def test_metadata_elems_from_row(): + """Test metadata_elems_from_row function.""" row = {'title': 'Test title'} - metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') - assert metadata_elem['key'] == 'dc.title' - assert metadata_elem['value'] == 'Test title' + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + assert metadata_elem[0]['language'] == 'en_US' + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + assert 'language' not in metadata_elem[0] + row = {'title': ''} + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem == [] + row = {'title': 'Test title 1|Test title 2'} + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US', '|') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title 1' + assert metadata_elem[0]['language'] == 'en_US' + assert metadata_elem[1]['key'] == 'dc.title' + assert metadata_elem[1]['value'] == 'Test title 2' + assert metadata_elem[1]['language'] == 'en_US' + + +# def test_create_ingest_report(): +# assert False def test_create_metadata_rec():