Skip to content

Commit

Permalink
Create DSpace metadata from Crossref data
Browse files Browse the repository at this point in the history
* Add function to create DSpace-compliant metadata from Crossref data
* Add metadata mapping file to config folder
* Add corresponding unit test

* https://mitlibraries.atlassian.net/browse/DLSPP-112
  • Loading branch information
ehanson8 committed Oct 15, 2021
1 parent 7c86acb commit b70f7d5
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 21 deletions.
20 changes: 19 additions & 1 deletion awd/crossref.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import requests
import smart_open

Expand Down Expand Up @@ -40,7 +42,7 @@ def get_metadata_dict_from_crossref_work(work):
if key == "author":
authors = []
for author in work["author"]:
name = f'{author["family"]}, {author["given"]}'
name = f'{author.get("family")}, {author.get("given")}'
authors.append(name)
value_dict[key] = authors
elif key == "title":
Expand All @@ -51,3 +53,19 @@ def get_metadata_dict_from_crossref_work(work):
else:
value_dict[key] = work[key]
return value_dict


def create_dspace_metadata_from_dict(value_dict, metadata_mapping_path):
"""Create DSpace JSON metadata from metadata dict and a JSON metadata mapping file."""
with open(metadata_mapping_path, "r") as metadata_mapping:
metadata_mapping = json.load(metadata_mapping)
metadata = []
for key in [k for k in metadata_mapping if k in value_dict.keys()]:
if isinstance(value_dict[key], list):
for list_item in value_dict[key]:
metadata.append({"key": metadata_mapping[key], "value": list_item})
else:
metadata.append(
{"key": metadata_mapping[key], "value": value_dict[key]}
)
return {"metadata": metadata}
15 changes: 15 additions & 0 deletions config/metadata_mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"author": "dc.contributor.author",
"container-title": "dc.relation.journal",
"ISSN": "dc.identifier.issn",
"issue": "mit.journal.issue",
"issued": "dc.date.issued",
"language": "dc.langauge",
"original-title": "dc.title.alternative",
"publisher": "dc.publisher",
"short-title": "dc.title.alternative",
"subtitle": "dc.title.alternative",
"title": "dc.title",
"URL": "dc.relation.isversionof",
"volume": "mit.journal.volume"
}
91 changes: 91 additions & 0 deletions tests/fixtures/dspace_metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"metadata": [{
"key": "dc.contributor.author",
"value": "Eivazzadeh‐Keihan, Reza"
},
{
"key": "dc.contributor.author",
"value": "Bahojb Noruzi, Ehsan"
},
{
"key": "dc.contributor.author",
"value": "Khanmohammadi Chenab, Karim"
},
{
"key": "dc.contributor.author",
"value": "Jafari, Amir"
},
{
"key": "dc.contributor.author",
"value": "Radinekiyan, Fateme"
},
{
"key": "dc.contributor.author",
"value": "Hashemi, Seyed Masoud"
},
{
"key": "dc.contributor.author",
"value": "Ahmadpour, Farnoush"
},
{
"key": "dc.contributor.author",
"value": "Behboudi, Ali"
},
{
"key": "dc.contributor.author",
"value": "Mosafer, Jafar"
},
{
"key": "dc.contributor.author",
"value": "Mokhtarzadeh, Ahad"
},
{
"key": "dc.contributor.author",
"value": "Maleki, Ali"
},
{
"key": "dc.contributor.author",
"value": "Hamblin, Michael R."
},
{
"key": "dc.relation.journal",
"value": "Journal of Tissue Engineering and Regenerative Medicine"
},
{
"key": "dc.identifier.issn",
"value": "1932-6254"
},
{
"key": "dc.identifier.issn",
"value": "1932-7005"
},
{
"key": "mit.journal.issue",
"value": "12"
},
{
"key": "dc.date.issued",
"value": "2020-09-30"
},
{
"key": "dc.langauge",
"value": "en"
},
{
"key": "dc.publisher",
"value": "Wiley"
},
{
"key": "dc.title",
"value": "Metal‐based nanoparticles for bone tissue engineering"
},
{
"key": "dc.relation.isversionof",
"value": "http://dx.doi.org/10.1002/term.3131"
},
{
"key": "mit.journal.volume",
"value": "14"
}
]
}
39 changes: 19 additions & 20 deletions tests/test_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def test_get_dois_from_spreadsheet():
dois = crossref.get_dois_from_spreadsheet("fixtures/test.csv")
dois = crossref.get_dois_from_spreadsheet("tests/fixtures/test.csv")
for doi in dois:
assert doi == "10.1002/term.3131"

Expand Down Expand Up @@ -30,23 +30,22 @@ def test_create_dspace_metadata_from_dict_minimum_metadata():
metadata = crossref.create_dspace_metadata_from_dict(
value_dict, "config/metadata_mapping.json"
)
assert value_dict["publisher"] == "Wiley"
assert value_dict["author"] == [
"Eivazzadeh‐Keihan, Reza",
"Bahojb Noruzi, Ehsan",
"Khanmohammadi Chenab, Karim",
"Jafari, Amir",
"Radinekiyan, Fateme",
"Hashemi, Seyed Masoud",
"Ahmadpour, Farnoush",
"Behboudi, Ali",
"Mosafer, Jafar",
"Mokhtarzadeh, Ahad",
"Maleki, Ali",
"Hamblin, Michael R.",
assert metadata["metadata"] == [
{
"key": "dc.title",
"value": "Metal‐based nanoparticles for bone tissue engineering",
},
{
"key": "dc.relation.isversionof",
"value": "http://dx.doi.org/10.1002/term.3131",
},
]
assert value_dict["URL"] == "http://dx.doi.org/10.1002/term.3131"
assert value_dict["container-title"] == [
"Journal of Tissue Engineering and Regenerative Medicine"
]
assert value_dict["issued"] == "2020-09-30"


def test_transform_dict_with_metadata_mapping_full_metadata(
crossref_value_dict, dspace_metadata
):
metadata = crossref.create_dspace_metadata_from_dict(
crossref_value_dict, "config/metadata_mapping.json"
)
assert metadata == dspace_metadata

0 comments on commit b70f7d5

Please sign in to comment.