diff --git a/dsaps/cli.py b/dsaps/cli.py index cfc4ad0..9881ed2 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -47,6 +47,11 @@ def main(ctx, url, email, password): ctx.obj['start_time'] = start_time +@click.group() +def aux(): + pass + + @main.command() @click.option('-f', '--field', prompt='Enter the field to be searched', help='The field to search.') @@ -106,7 +111,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') -@main.command() +@aux.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') @click.option('-f', '--file_path', prompt='Enter the path', @@ -145,5 +150,47 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') +@aux.command() +@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', + help='The path of the CSV file of metadata.') +def metadatajson(metadata_csv): + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + metadata_group = [] + mapping_dict = {'fileIdentifier': ['file_identifier'], + 'dc.contributor.author': ['author name - direct'], + 'dc.contributor.advisor': ['supervisor(s)'], + 'dc.date.issued': ['pub date'], + 'dc.description.abstract': ['Abstract', 'en_US'], + 'dc.title': ['Title', 'en_US'], + 'dc.relation.ispartofseries': ['file_identifier']} + for row in reader: + metadata_rec = [] + metadata_rec = models.create_metadata_rec(mapping_dict, row, + metadata_rec) + metadata_rec.append({'key': 'dc.format.mimetype', 'language': + 'en_US', 'value': 'application/pdf'}) + metadata_rec.append({'key': 'dc.language.iso', 'language': + 'en_US', 'value': 'en_US'}) + metadata_rec.append({'key': 'dc.publisher', 'language': 'en_US', + 'value': 'Massachusetts Institute of ' + 'Technology. Laboratory for Computer' + 'Science'}) + metadata_rec.append({'key': 'dc.rights', 'language': 'en_US', + 'value': 'Educational use permitted'}) + metadata_rec.append({'key': 'dc.rights.uri', 'language': 'en_US', + 'value': 'http://rightsstatements.org/vocab/' + 'InC-EDU/1.0/'}) + metadata_rec.append({'key': 'dc.type', 'language': 'en_US', + 'value': 'Technical Report'}) + item = {'metadata': metadata_rec} + metadata_group.append(item) + file_name = os.path.splitext(os.path.basename(metadata_csv))[0] + with open(f'{file_name}.json', 'w') as f: + json.dump(metadata_group, f) + + +cli = click.CommandCollection(sources=[main, aux]) + if __name__ == '__main__': - main() + cli() diff --git a/dsaps/models.py b/dsaps/models.py index 8c13020..aabc523 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -211,3 +211,26 @@ def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) logger.info(f'{label} : {td}') + + +def metadata_csv(row, key, field, language=None): + """Create metadata element from CSV.""" + value = row[field] + if language is not None: + metadata_elem = {'key': key, 'language': language, 'value': + value} + else: + metadata_elem = {'key': key, 'value': value} + return metadata_elem + + +def create_metadata_rec(mapping_dict, row, metadata_rec): + """Create metadata record from CSV.""" + for k, v in mapping_dict.items(): + if len(v) == 2: + metadata_elem = metadata_csv(row, k, v[0], v[1]) + else: + metadata_elem = metadata_csv(row, k, v[0]) + if metadata_elem['value'] != '': + metadata_rec.append(metadata_elem) + return metadata_rec diff --git a/tests/test_models.py b/tests/test_models.py index 5bc8b49..a61e813 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -155,3 +155,20 @@ def test_build_file_dict_remote(): # def test_create_csv_from_list(): # """Test create_csv_from_list function.""" # assert False + + +def test_metadata_csv(): + """Test metadata_csv function.""" + row = {'title': 'Test title'} + metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') + assert metadata_elem['key'] == 'dc.title' + assert metadata_elem['value'] == 'Test title' + + +def test_create_metadata_rec(): + metadata_rec = [] + row = {'title': 'Test title'} + mapping_dict = {'dc.title': ['title']} + metadata_rec = models.create_metadata_rec(mapping_dict, row, metadata_rec) + assert metadata_rec[0]['key'] == 'dc.title' + assert metadata_rec[0]['value'] == 'Test title'