Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions dsaps/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import datetime
import glob
import json
Expand Down Expand Up @@ -105,5 +106,44 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
models.elapsed_time(start_time, 'Total runtime:')


@main.command()
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
help='The path of the CSV file of metadata.')
@click.option('-f', '--file_path', prompt='Enter the path',
help='The path of the content, a URL or local drive path.')
@click.option('-t', '--file_type', prompt='Enter the file type',
help='The file type to be uploaded.')
def reconcile(metadata_csv, file_path, file_type):
if file_path.startswith('http'):
file_dict = models.build_file_dict_remote(file_path, file_type, {})
else:
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
for file in files:
file_name = os.path.splitext(os.path.basename(file))[0]
file_dict[file_name] = file
metadata_ids = []
with open(metadata_csv) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
value = row['file_identifier']
metadata_ids.append(value)
file_matches = []
file_ids = []
for file_id, v in file_dict.items():
file_ids.append(file_id)
for metadata_id in [m for m in metadata_ids if file_id == m]:
file_matches.append(file_id)
metadata_matches = []
for metadata_id in metadata_ids:
for file_id in file_dict:
if file_id == metadata_id:
metadata_matches.append(metadata_id)
no_files = set(metadata_ids) - set(metadata_matches)
no_metadata = set(file_ids) - set(file_matches)
models.create_csv_from_list(no_metadata, 'no_metadata.csv')
models.create_csv_from_list(no_files, 'no_files.csv')
models.create_csv_from_list(metadata_matches, 'metadata_matches.csv')


if __name__ == '__main__':
main()
10 changes: 10 additions & 0 deletions dsaps/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import datetime
from functools import partial
import operator
Expand Down Expand Up @@ -197,6 +198,15 @@ def build_file_dict_remote(directory_url, file_type, file_dict):
return file_dict


def create_csv_from_list(list_name, output):
"""Creates CSV file from list content."""
with open(output, 'w') as f:
writer = csv.writer(f)
writer.writerow(['id'])
for item in list_name:
writer.writerow([item])


def elapsed_time(start_time, label):
"""Calculate elapsed time."""
td = datetime.timedelta(seconds=time.time() - start_time)
Expand Down
22 changes: 14 additions & 8 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def sample_content(tmp_path):


def test_authenticate(client):
"""Test authenticate function."""
"""Test authenticate method."""
with requests_mock.Mocker() as m:
email = 'test@test.mock'
password = '1234'
Expand All @@ -39,7 +39,7 @@ def test_authenticate(client):


def test_get_record(client):
"""Test get_record function."""
"""Test get_record method."""
with requests_mock.Mocker() as m:
uri = 'mock://example.com/items/123?expand=all'
json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'}
Expand All @@ -49,7 +49,7 @@ def test_get_record(client):


def test_filtered_item_search(client):
"""Test filtered_item_search function."""
"""Test filtered_item_search method."""
with requests_mock.Mocker() as m:
key = 'dc.title'
string = 'test'
Expand All @@ -65,7 +65,7 @@ def test_filtered_item_search(client):


def test_post_coll_to_comm(client):
"""Test post_coll_to_comm function."""
"""Test post_coll_to_comm method."""
with requests_mock.Mocker() as m:
comm_handle = '1234'
coll_name = 'Test Collection'
Expand All @@ -79,7 +79,7 @@ def test_post_coll_to_comm(client):


def test_post_items_to_coll(client, sample_content):
"""Test post_items_to_coll function."""
"""Test post_items_to_coll method."""
with requests_mock.Mocker() as m:
coll_metadata = [{"metadata": [
{"key": "file_identifier",
Expand All @@ -102,7 +102,7 @@ def test_post_items_to_coll(client, sample_content):


def test_post_bitstreams_to_item(client, sample_content):
"""Test post_bitstreams_to_item function."""
"""Test post_bitstreams_to_item method."""
with requests_mock.Mocker() as m:
item_id = 'a1b2'
ingest_type = 'local'
Expand All @@ -118,7 +118,7 @@ def test_post_bitstreams_to_item(client, sample_content):


def test__pop_inst(client):
"""Test _pop_inst function."""
"""Test _pop_inst method."""
class_type = models.Collection
rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
rec_obj = client._pop_inst(class_type, rec_obj)
Expand All @@ -127,7 +127,7 @@ def test__pop_inst(client):


def test__build_uuid_list(client):
"""Test _build_uuid_list function."""
"""Test _build_uuid_list method."""
rec_obj = {'items': [{'uuid': '1234'}]}
children = 'items'
child_list = client._build_uuid_list(rec_obj, children)
Expand All @@ -149,3 +149,9 @@ def test_build_file_dict_remote():
file_list = models.build_file_dict_remote(directory_url, file_type,
file_dict)
assert '999' in file_list


# # How to test this? Applies to asaps as well
# def test_create_csv_from_list():
# """Test create_csv_from_list function."""
# assert False