Skip to content

Commit 62520ff

Browse files
committed
metadata json transform
1 parent 257b61c commit 62520ff

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed

dsaps/cli.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,5 +145,50 @@ def reconcile(metadata_csv, file_path, file_type):
145145
models.create_csv_from_list(metadata_matches, 'metadata_matches.csv')
146146

147147

148+
@main.command()
149+
@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
150+
help='The path of the CSV file of metadata.')
151+
def metadatajson(metadata_csv):
152+
with open(metadata_csv) as csvfile:
153+
reader = csv.DictReader(csvfile)
154+
metadata_group = []
155+
for row in reader:
156+
metadata_rec = []
157+
models.metadata_csv(row, metadata_rec, 'fileIdentifier',
158+
'file_identifier', '', '')
159+
models.metadata_csv(row, metadata_rec, 'dc.contributor.author',
160+
'author name - direct', '', '')
161+
models.metadata_csv(row, metadata_rec, 'dc.contributor.advisor',
162+
'supervisor(s)', '', '')
163+
models.metadata_csv(row, metadata_rec, 'dc.date.issued',
164+
'pub date', '', '')
165+
models.metadata_csv(row, metadata_rec, 'dc.description.abstract',
166+
'Abstract', 'en_US', '')
167+
models.metadata_direct(metadata_rec, 'dc.format.mimetype',
168+
'application/pdf', 'en_US')
169+
models.metadata_direct(metadata_rec, 'dc.language.iso', 'en_US',
170+
'en_US')
171+
models.metadata_direct(metadata_rec, 'dc.publisher',
172+
'Massachusetts Institute of Technology. '
173+
'Laboratory for Computer Science', 'en_US')
174+
models.metadata_csv(row, metadata_rec,
175+
'dc.relation.ispartofseries',
176+
'file_identifier', 'en_US', '')
177+
models.metadata_direct(metadata_rec, 'dc.rights',
178+
'Educational use permitted', 'en_US')
179+
models.metadata_direct(metadata_rec, 'dc.rights.uri',
180+
'http://rightsstatements.org/vocab/'
181+
'InC-EDU/1.0/', 'en_US')
182+
models.metadata_csv(row, metadata_rec, 'dc.title', 'Title',
183+
'en_US', '')
184+
models.metadata_direct(metadata_rec, 'dc.type', 'Technical Report',
185+
'en_US')
186+
item = {'metadata': metadata_rec}
187+
metadata_group.append(item)
188+
file_name = os.path.splitext(os.path.basename(metadata_csv))[0]
189+
f = open(f'{file_name}.json', 'w')
190+
json.dump(metadata_group, f)
191+
192+
148193
if __name__ == '__main__':
149194
main()

dsaps/models.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,39 @@ def elapsed_time(start_time, label):
211211
"""Calculate elapsed time."""
212212
td = datetime.timedelta(seconds=time.time() - start_time)
213213
logger.info(f'{label} : {td}')
214+
215+
216+
def metadata_csv(row, metadata_rec, key, field, language, delimiter):
217+
"""Create metadata elements from CSV, including fields with delimiters."""
218+
if row[field] != '':
219+
if delimiter != '' and delimiter in row[field]:
220+
values = row[field].split(delimiter)
221+
for value in values:
222+
if language != '':
223+
metadata_elem = {'key': key, 'language': language, 'value':
224+
value}
225+
metadata_rec.append(metadata_elem)
226+
else:
227+
metadata_elem = {'key': key, 'value': value}
228+
metadata_rec.append(metadata_elem)
229+
else:
230+
value = row[field]
231+
if language != '':
232+
metadata_elem = {'key': key, 'language': language, 'value':
233+
value}
234+
metadata_rec.append(metadata_elem)
235+
else:
236+
metadata_elem = {'key': key, 'value': value}
237+
metadata_rec.append(metadata_elem)
238+
else:
239+
pass
240+
241+
242+
def metadata_direct(metadata_rec, key, value, language):
243+
"""Create metadata element with specified value."""
244+
if language != '':
245+
metadata_elem = {'key': key, 'language': language, 'value': value}
246+
metadata_rec.append(metadata_elem)
247+
else:
248+
metadata_elem = {'key': key, 'value': value}
249+
metadata_rec.append(metadata_elem)

tests/test_models.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,21 @@ def test_build_file_dict_remote():
155155
# def test_create_csv_from_list():
156156
# """Test create_csv_from_list function."""
157157
# assert False
158+
159+
160+
def test_metadata_csv():
161+
"""Test metadata_csv function."""
162+
metadata_rec = []
163+
row = {'title': 'Test title'}
164+
models.metadata_csv(row, metadata_rec, 'dc.title', 'title', 'en_US', '')
165+
assert metadata_rec[0]['key'] == 'dc.title'
166+
assert metadata_rec[0]['value'] == 'Test title'
167+
168+
169+
def test_metadata_direct():
170+
"""Test metadata_direct function."""
171+
metadata_rec = []
172+
value = 'No one may ever view this content.'
173+
models.metadata_direct(metadata_rec, 'dc.rights', value, 'en_US')
174+
assert metadata_rec[0]['key'] == 'dc.rights'
175+
assert metadata_rec[0]['value'] == 'No one may ever view this content.'

0 commit comments

Comments
 (0)