Skip to content

Commit

Permalink
Datacite & JPAL transform
Browse files Browse the repository at this point in the history
* Update Makefile test command to include uncovered lines
* Add new dependencies
* Add fixtures for Datacite XML records and TIMDEX JSON records
* Add unit tests for all new functions
* Add config.py with namespaces constant
* Add Datacite module with transform and support functions
* Add TimdexRecord class with methods and validator functions for class fields
* Add JPAL module with transform function
  • Loading branch information
ehanson8 committed Apr 19, 2022
1 parent c784458 commit ef91cb8
Show file tree
Hide file tree
Showing 17 changed files with 4,442 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ mypy:
pipenv run mypy transmogrifier

test:
pipenv run pytest --cov=transmogrifier
pipenv run pytest --cov-report term-missing --cov=transmogrifier
3 changes: 3 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ name = "pypi"
[packages]
click = "*"
sentry-sdk = "*"
attrs = "*"
types-requests = "*"
defusedxml = "*"

[dev-packages]
black = "*"
Expand Down
50 changes: 40 additions & 10 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 81 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,88 @@
import json

import pytest
from click.testing import CliRunner
from defusedxml import ElementTree as ET


@pytest.fixture()
def runner():
return CliRunner()


@pytest.fixture()
def timdex_record_generic_full():
return {
"timdex_record_id": "123",
"title": "Dataset 1",
"identifiers": [{"value": "123", "kind": "DOI"}],
"source": "Data Provider",
"source_link": "example://example.example",
"contributors": [
{
"value": "Smith, Jane",
"kind": "author",
"identifier": "45678",
"affiliation": "University",
"mit_affiliated": True,
}
],
"dates": [{"range": {"gte": 1901, "lte": 1970}}],
"notes": [{"value": "Survey Data", "kind": "ResourceType"}],
"content_type": ["Dataset"],
"publication_information": ["Harvard Dataverse"],
}


@pytest.fixture()
def timdex_record_generic_minimal():
return {
"timdex_record_id": "123",
"title": "Dataset 1",
"identifiers": [{"value": "123"}],
"source": "Data Provider",
"source_link": "example://example.example",
}


@pytest.fixture()
def datacite_record_jpal_full():
tree = ET.parse(open("tests/fixtures/datacite_record_jpal_full.xml"))
xml_template = tree.getroot()
return ET.tostring(
xml_template,
encoding="utf8",
method="xml",
)


@pytest.fixture()
def datacite_record_jpal_minimal():
tree = ET.parse(open("tests/fixtures/datacite_record_jpal_minimal.xml"))
xml_template = tree.getroot()
return ET.tostring(
xml_template,
encoding="utf8",
method="xml",
)


@pytest.fixture()
def datacite_records_jpal_full_set():
tree = ET.parse(open("tests/fixtures/jpal-datacite-full-harvest.xml"))
xml_template = tree.getroot()
return ET.tostring(
xml_template,
encoding="utf8",
method="xml",
)


@pytest.fixture()
def timdex_record_jpal_full():
return json.load(open("tests/fixtures/timdex_record_jpal_full.json"))


@pytest.fixture()
def timdex_record_jpal_minimal():
return json.load(open("tests/fixtures/timdex_record_jpal_minimal.json"))
106 changes: 106 additions & 0 deletions tests/fixtures/datacite_record_jpal_full.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header>
<identifier>doi:10.7910/DVN/19PPE7</identifier>
<datestamp>2022-03-26T06:04:55Z</datestamp>
<setSpec>Jameel_Poverty_Action_Lab</setSpec>
<setSpec>IQSS</setSpec>
</header>
<metadata>
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
<identifier identifierType="DOI">10.7910/DVN/19PPE7</identifier>
<creators>
<creator>
<creatorName nameType="Personal">Banerji, Rukmini</creatorName>
<givenName>Rukmini</givenName>
<familyName>Banerji</familyName>
<affiliation>Pratham and ASER Centre</affiliation>
</creator>
<creator>
<creatorName nameType="Personal">Berry, James</creatorName>
<givenName>James</givenName>
<familyName>Berry</familyName>
<affiliation>University of Delaware</affiliation>
</creator>
<creator>
<creatorName nameType="Personal">Shotland, Marc</creatorName>
<givenName>Marc</givenName>
<familyName>Shotland</familyName>
<affiliation>Abdul Latif Jameel Poverty Action Lab</affiliation>
</creator>
</creators>
<titles>
<title>The Impact of Maternal Literacy and Participation Programs: Evidence from a Randomized Evaluation in India</title>
</titles>
<publisher>Harvard Dataverse</publisher>
<publicationYear>2017</publicationYear>
<subjects>
<subject>Social Sciences</subject>
<subject>Adult education, education inputs, field experiments</subject>
</subjects>
<contributors>
<contributor contributorType="ContactPerson">
<contributorName nameType="Personal">Banerji, Rukmini</contributorName>
<givenName>Rukmini</givenName>
<familyName>Banerji</familyName>
<affiliation>Pratham and ASER Centre</affiliation>
</contributor>
<contributor contributorType="ContactPerson">
<contributorName nameType="Personal">Berry, James</contributorName>
<givenName>James</givenName>
<familyName>Berry</familyName>
<affiliation>University of Delaware</affiliation>
</contributor>
<contributor contributorType="ContactPerson">
<contributorName nameType="Personal">Shotland, Marc</contributorName>
<givenName>Marc</givenName>
<familyName>Shotland</familyName>
<affiliation>Abdul Latif Jameel Poverty Action Lab</affiliation>
</contributor>
</contributors>
<dates>
<date dateType="Submitted">2017-02-27</date>
<date dateType="Updated">2019-06-24</date>
</dates>
<resourceType resourceTypeGeneral="Dataset">Survey Data</resourceType>
<relatedIdentifiers>
<relatedIdentifier relationType="IsCitedBy" relatedIdentifierType="DOI">10.1257/app.20150390</relatedIdentifier>
</relatedIdentifiers>
<sizes>
<size>124903</size>
<size>48958</size>
<size>199070</size>
<size>186674</size>
<size>139605</size>
<size>97304</size>
<size>9907</size>
<size>178534602</size>
<size>4032103</size>
<size>43589</size>
<size>15697</size>
</sizes>
<formats>
<format>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</format>
<format>application/pdf</format>
<format>application/pdf</format>
<format>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</format>
<format>application/pdf</format>
<format>application/x-stata-syntax</format>
<format>application/x-stata</format>
<format>application/x-stata</format>
<format>application/zip</format>
<format>application/pdf</format>
<format>application/pdf</format>
</formats>
<version>1.2</version>
<rightsList>
<rights rightsURI="info:eu-repo/semantics/openAccess" />
<rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0">CC0 1.0</rights>
</rightsList>
<descriptions>
<description descriptionType="Abstract">Using a randomized field experiment in India, we evaluate the effectiveness of adult literacy and parental involvement interventions in improving children's learning. Households were assigned to receive either adult literacy (language and math) classes for mothers, training for mothers on how to enhance their children's learning at home, or a combination of the two programs. All three interventions had significant but modest impacts on childrens math scores. The interventions also increased mothers' test scores in both language and math, as well as a range of other outcomes reflecting greater involvement of mothers in their children's education.</description>
</descriptions>
<geoLocations />
</resource>
</metadata>
</record>
17 changes: 17 additions & 0 deletions tests/fixtures/datacite_record_jpal_minimal.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header>
<identifier>doi:10.7910/DVN/19PPE7</identifier>
<datestamp>2022-03-26T06:04:55Z</datestamp>
<setSpec>Jameel_Poverty_Action_Lab</setSpec>
<setSpec>IQSS</setSpec>
</header>
<metadata>
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
<identifier identifierType="DOI">10.7910/DVN/19PPE7</identifier>
<titles>
<title>The Impact of Maternal Literacy and Participation Programs: Evidence from a Randomized Evaluation in India</title>
</titles>
</resource>
</metadata>
</record>
Loading

0 comments on commit ef91cb8

Please sign in to comment.