Skip to content

Commit 59fc7a6

Browse files
authored
Merge pull request #13 from MITLibraries/metadata-json
metadata json transform
2 parents 257b61c + 3362540 commit 59fc7a6

File tree

3 files changed

+89
-2
lines changed

3 files changed

+89
-2
lines changed

dsaps/cli.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ def main(ctx, url, email, password):
4747
ctx.obj['start_time'] = start_time
4848

4949

50+
@click.group()
51+
def aux():
52+
pass
53+
54+
5055
@main.command()
5156
@click.option('-f', '--field', prompt='Enter the field to be searched',
5257
help='The field to search.')
@@ -106,7 +111,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
106111
models.elapsed_time(start_time, 'Total runtime:')
107112

108113

109-
@main.command()
114+
@aux.command()
110115
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
111116
help='The path of the CSV file of metadata.')
112117
@click.option('-f', '--file_path', prompt='Enter the path',
@@ -145,5 +150,47 @@ def reconcile(metadata_csv, file_path, file_type):
145150
models.create_csv_from_list(metadata_matches, 'metadata_matches.csv')
146151

147152

153+
@aux.command()
154+
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
155+
help='The path of the CSV file of metadata.')
156+
def metadatajson(metadata_csv):
157+
with open(metadata_csv) as csvfile:
158+
reader = csv.DictReader(csvfile)
159+
metadata_group = []
160+
mapping_dict = {'fileIdentifier': ['file_identifier'],
161+
'dc.contributor.author': ['author name - direct'],
162+
'dc.contributor.advisor': ['supervisor(s)'],
163+
'dc.date.issued': ['pub date'],
164+
'dc.description.abstract': ['Abstract', 'en_US'],
165+
'dc.title': ['Title', 'en_US'],
166+
'dc.relation.ispartofseries': ['file_identifier']}
167+
for row in reader:
168+
metadata_rec = []
169+
metadata_rec = models.create_metadata_rec(mapping_dict, row,
170+
metadata_rec)
171+
metadata_rec.append({'key': 'dc.format.mimetype', 'language':
172+
'en_US', 'value': 'application/pdf'})
173+
metadata_rec.append({'key': 'dc.language.iso', 'language':
174+
'en_US', 'value': 'en_US'})
175+
metadata_rec.append({'key': 'dc.publisher', 'language': 'en_US',
176+
'value': 'Massachusetts Institute of '
177+
'Technology. Laboratory for Computer'
178+
'Science'})
179+
metadata_rec.append({'key': 'dc.rights', 'language': 'en_US',
180+
'value': 'Educational use permitted'})
181+
metadata_rec.append({'key': 'dc.rights.uri', 'language': 'en_US',
182+
'value': 'http://rightsstatements.org/vocab/'
183+
'InC-EDU/1.0/'})
184+
metadata_rec.append({'key': 'dc.type', 'language': 'en_US',
185+
'value': 'Technical Report'})
186+
item = {'metadata': metadata_rec}
187+
metadata_group.append(item)
188+
file_name = os.path.splitext(os.path.basename(metadata_csv))[0]
189+
with open(f'{file_name}.json', 'w') as f:
190+
json.dump(metadata_group, f)
191+
192+
193+
cli = click.CommandCollection(sources=[main, aux])
194+
148195
if __name__ == '__main__':
149-
main()
196+
cli()

dsaps/models.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,26 @@ def elapsed_time(start_time, label):
211211
"""Calculate elapsed time."""
212212
td = datetime.timedelta(seconds=time.time() - start_time)
213213
logger.info(f'{label} : {td}')
214+
215+
216+
def metadata_csv(row, key, field, language=None):
217+
"""Create metadata element from CSV."""
218+
value = row[field]
219+
if language is not None:
220+
metadata_elem = {'key': key, 'language': language, 'value':
221+
value}
222+
else:
223+
metadata_elem = {'key': key, 'value': value}
224+
return metadata_elem
225+
226+
227+
def create_metadata_rec(mapping_dict, row, metadata_rec):
228+
"""Create metadata record from CSV."""
229+
for k, v in mapping_dict.items():
230+
if len(v) == 2:
231+
metadata_elem = metadata_csv(row, k, v[0], v[1])
232+
else:
233+
metadata_elem = metadata_csv(row, k, v[0])
234+
if metadata_elem['value'] != '':
235+
metadata_rec.append(metadata_elem)
236+
return metadata_rec

tests/test_models.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,20 @@ def test_build_file_dict_remote():
155155
# def test_create_csv_from_list():
156156
# """Test create_csv_from_list function."""
157157
# assert False
158+
159+
160+
def test_metadata_csv():
161+
"""Test metadata_csv function."""
162+
row = {'title': 'Test title'}
163+
metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US')
164+
assert metadata_elem['key'] == 'dc.title'
165+
assert metadata_elem['value'] == 'Test title'
166+
167+
168+
def test_create_metadata_rec():
169+
metadata_rec = []
170+
row = {'title': 'Test title'}
171+
mapping_dict = {'dc.title': ['title']}
172+
metadata_rec = models.create_metadata_rec(mapping_dict, row, metadata_rec)
173+
assert metadata_rec[0]['key'] == 'dc.title'
174+
assert metadata_rec[0]['value'] == 'Test title'

0 commit comments

Comments
 (0)