From 62520ffd9f6fe67855051786370208f09de9fefc Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Sun, 19 Jan 2020 15:41:52 -0500 Subject: [PATCH 1/2] metadata json transform --- dsaps/cli.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ dsaps/models.py | 36 +++++++++++++++++++++++++++++++++++ tests/test_models.py | 18 ++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/dsaps/cli.py b/dsaps/cli.py index cfc4ad0..60478a1 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -145,5 +145,50 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') +@main.command() +@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', + help='The path of the CSV file of metadata.') +def metadatajson(metadata_csv): + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + metadata_group = [] + for row in reader: + metadata_rec = [] + models.metadata_csv(row, metadata_rec, 'fileIdentifier', + 'file_identifier', '', '') + models.metadata_csv(row, metadata_rec, 'dc.contributor.author', + 'author name - direct', '', '') + models.metadata_csv(row, metadata_rec, 'dc.contributor.advisor', + 'supervisor(s)', '', '') + models.metadata_csv(row, metadata_rec, 'dc.date.issued', + 'pub date', '', '') + models.metadata_csv(row, metadata_rec, 'dc.description.abstract', + 'Abstract', 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.format.mimetype', + 'application/pdf', 'en_US') + models.metadata_direct(metadata_rec, 'dc.language.iso', 'en_US', + 'en_US') + models.metadata_direct(metadata_rec, 'dc.publisher', + 'Massachusetts Institute of Technology. ' + 'Laboratory for Computer Science', 'en_US') + models.metadata_csv(row, metadata_rec, + 'dc.relation.ispartofseries', + 'file_identifier', 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.rights', + 'Educational use permitted', 'en_US') + models.metadata_direct(metadata_rec, 'dc.rights.uri', + 'http://rightsstatements.org/vocab/' + 'InC-EDU/1.0/', 'en_US') + models.metadata_csv(row, metadata_rec, 'dc.title', 'Title', + 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.type', 'Technical Report', + 'en_US') + item = {'metadata': metadata_rec} + metadata_group.append(item) + file_name = os.path.splitext(os.path.basename(metadata_csv))[0] + f = open(f'{file_name}.json', 'w') + json.dump(metadata_group, f) + + if __name__ == '__main__': main() diff --git a/dsaps/models.py b/dsaps/models.py index 8c13020..fdd4f76 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -211,3 +211,39 @@ def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) logger.info(f'{label} : {td}') + + +def metadata_csv(row, metadata_rec, key, field, language, delimiter): + """Create metadata elements from CSV, including fields with delimiters.""" + if row[field] != '': + if delimiter != '' and delimiter in row[field]: + values = row[field].split(delimiter) + for value in values: + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) + else: + value = row[field] + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) + else: + pass + + +def metadata_direct(metadata_rec, key, value, language): + """Create metadata element with specified value.""" + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) diff --git a/tests/test_models.py b/tests/test_models.py index 5bc8b49..c06f4a4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -155,3 +155,21 @@ def test_build_file_dict_remote(): # def test_create_csv_from_list(): # """Test create_csv_from_list function.""" # assert False + + +def test_metadata_csv(): + """Test metadata_csv function.""" + metadata_rec = [] + row = {'title': 'Test title'} + models.metadata_csv(row, metadata_rec, 'dc.title', 'title', 'en_US', '') + assert metadata_rec[0]['key'] == 'dc.title' + assert metadata_rec[0]['value'] == 'Test title' + + +def test_metadata_direct(): + """Test metadata_direct function.""" + metadata_rec = [] + value = 'No one may ever view this content.' + models.metadata_direct(metadata_rec, 'dc.rights', value, 'en_US') + assert metadata_rec[0]['key'] == 'dc.rights' + assert metadata_rec[0]['value'] == 'No one may ever view this content.' From 3362540d58fbaf07be27d998f0c52642deb845e6 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 2 Mar 2020 11:30:50 -0500 Subject: [PATCH 2/2] PR updates --- dsaps/cli.py | 70 +++++++++++++++++++++++--------------------- dsaps/models.py | 49 ++++++++++++------------------- tests/test_models.py | 19 ++++++------ 3 files changed, 63 insertions(+), 75 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index 60478a1..9881ed2 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -47,6 +47,11 @@ def main(ctx, url, email, password): ctx.obj['start_time'] = start_time +@click.group() +def aux(): + pass + + @main.command() @click.option('-f', '--field', prompt='Enter the field to be searched', help='The field to search.') @@ -106,7 +111,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') -@main.command() +@aux.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') @click.option('-f', '--file_path', prompt='Enter the path', @@ -145,50 +150,47 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') -@main.command() +@aux.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') def metadatajson(metadata_csv): with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) metadata_group = [] + mapping_dict = {'fileIdentifier': ['file_identifier'], + 'dc.contributor.author': ['author name - direct'], + 'dc.contributor.advisor': ['supervisor(s)'], + 'dc.date.issued': ['pub date'], + 'dc.description.abstract': ['Abstract', 'en_US'], + 'dc.title': ['Title', 'en_US'], + 'dc.relation.ispartofseries': ['file_identifier']} for row in reader: metadata_rec = [] - models.metadata_csv(row, metadata_rec, 'fileIdentifier', - 'file_identifier', '', '') - models.metadata_csv(row, metadata_rec, 'dc.contributor.author', - 'author name - direct', '', '') - models.metadata_csv(row, metadata_rec, 'dc.contributor.advisor', - 'supervisor(s)', '', '') - models.metadata_csv(row, metadata_rec, 'dc.date.issued', - 'pub date', '', '') - models.metadata_csv(row, metadata_rec, 'dc.description.abstract', - 'Abstract', 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.format.mimetype', - 'application/pdf', 'en_US') - models.metadata_direct(metadata_rec, 'dc.language.iso', 'en_US', - 'en_US') - models.metadata_direct(metadata_rec, 'dc.publisher', - 'Massachusetts Institute of Technology. ' - 'Laboratory for Computer Science', 'en_US') - models.metadata_csv(row, metadata_rec, - 'dc.relation.ispartofseries', - 'file_identifier', 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.rights', - 'Educational use permitted', 'en_US') - models.metadata_direct(metadata_rec, 'dc.rights.uri', - 'http://rightsstatements.org/vocab/' - 'InC-EDU/1.0/', 'en_US') - models.metadata_csv(row, metadata_rec, 'dc.title', 'Title', - 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.type', 'Technical Report', - 'en_US') + metadata_rec = models.create_metadata_rec(mapping_dict, row, + metadata_rec) + metadata_rec.append({'key': 'dc.format.mimetype', 'language': + 'en_US', 'value': 'application/pdf'}) + metadata_rec.append({'key': 'dc.language.iso', 'language': + 'en_US', 'value': 'en_US'}) + metadata_rec.append({'key': 'dc.publisher', 'language': 'en_US', + 'value': 'Massachusetts Institute of ' + 'Technology. Laboratory for Computer' + 'Science'}) + metadata_rec.append({'key': 'dc.rights', 'language': 'en_US', + 'value': 'Educational use permitted'}) + metadata_rec.append({'key': 'dc.rights.uri', 'language': 'en_US', + 'value': 'http://rightsstatements.org/vocab/' + 'InC-EDU/1.0/'}) + metadata_rec.append({'key': 'dc.type', 'language': 'en_US', + 'value': 'Technical Report'}) item = {'metadata': metadata_rec} metadata_group.append(item) file_name = os.path.splitext(os.path.basename(metadata_csv))[0] - f = open(f'{file_name}.json', 'w') - json.dump(metadata_group, f) + with open(f'{file_name}.json', 'w') as f: + json.dump(metadata_group, f) + +cli = click.CommandCollection(sources=[main, aux]) if __name__ == '__main__': - main() + cli() diff --git a/dsaps/models.py b/dsaps/models.py index fdd4f76..aabc523 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -213,37 +213,24 @@ def elapsed_time(start_time, label): logger.info(f'{label} : {td}') -def metadata_csv(row, metadata_rec, key, field, language, delimiter): - """Create metadata elements from CSV, including fields with delimiters.""" - if row[field] != '': - if delimiter != '' and delimiter in row[field]: - values = row[field].split(delimiter) - for value in values: - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) - else: - value = row[field] - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) +def metadata_csv(row, key, field, language=None): + """Create metadata element from CSV.""" + value = row[field] + if language is not None: + metadata_elem = {'key': key, 'language': language, 'value': + value} else: - pass + metadata_elem = {'key': key, 'value': value} + return metadata_elem -def metadata_direct(metadata_rec, key, value, language): - """Create metadata element with specified value.""" - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) +def create_metadata_rec(mapping_dict, row, metadata_rec): + """Create metadata record from CSV.""" + for k, v in mapping_dict.items(): + if len(v) == 2: + metadata_elem = metadata_csv(row, k, v[0], v[1]) + else: + metadata_elem = metadata_csv(row, k, v[0]) + if metadata_elem['value'] != '': + metadata_rec.append(metadata_elem) + return metadata_rec diff --git a/tests/test_models.py b/tests/test_models.py index c06f4a4..a61e813 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -159,17 +159,16 @@ def test_build_file_dict_remote(): def test_metadata_csv(): """Test metadata_csv function.""" - metadata_rec = [] row = {'title': 'Test title'} - models.metadata_csv(row, metadata_rec, 'dc.title', 'title', 'en_US', '') - assert metadata_rec[0]['key'] == 'dc.title' - assert metadata_rec[0]['value'] == 'Test title' + metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') + assert metadata_elem['key'] == 'dc.title' + assert metadata_elem['value'] == 'Test title' -def test_metadata_direct(): - """Test metadata_direct function.""" +def test_create_metadata_rec(): metadata_rec = [] - value = 'No one may ever view this content.' - models.metadata_direct(metadata_rec, 'dc.rights', value, 'en_US') - assert metadata_rec[0]['key'] == 'dc.rights' - assert metadata_rec[0]['value'] == 'No one may ever view this content.' + row = {'title': 'Test title'} + mapping_dict = {'dc.title': ['title']} + metadata_rec = models.create_metadata_rec(mapping_dict, row, metadata_rec) + assert metadata_rec[0]['key'] == 'dc.title' + assert metadata_rec[0]['value'] == 'Test title'