diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java index 8882742d20c..d02e8f72838 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java @@ -18,6 +18,7 @@ import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import javax.ejb.EJB; import javax.ejb.EJBException; @@ -32,10 +33,11 @@ import javax.ws.rs.PathParam; import javax.ws.rs.core.Response; -import org.apache.commons.io.IOUtils; +import edu.harvard.iq.dataverse.util.BundleUtil; import org.apache.commons.lang.StringUtils; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.asJsonArray; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + import java.util.logging.Level; import java.util.logging.Logger; import javax.persistence.NoResultException; @@ -232,8 +234,7 @@ public Response loadNAControlledVocabularyValue() { } } - private enum HeaderType { - + public enum HeaderType { METADATABLOCK, DATASETFIELD, CONTROLLEDVOCABULARY } @@ -298,12 +299,20 @@ public Response loadDatasetFields(File file) { alr.setActionResult(ActionLogRecord.Result.BadRequest); alr.setInfo( alr.getInfo() + "// file not found"); return error(Status.EXPECTATION_FAILED, "File not found"); - + + } catch (ArrayIndexOutOfBoundsException e) { + String message = getArrayIndexOutOfBoundMessage(header, lineNumber, e); + logger.log(Level.WARNING, message, e); + alr.setActionResult(ActionLogRecord.Result.InternalError); + alr.setInfo(alr.getInfo() + "// " + message); + return error(Status.INTERNAL_SERVER_ERROR, message); + } catch (Exception e) { - logger.log(Level.WARNING, "Error parsing dataset fields:" + e.getMessage(), e); + String message = getGeneralErrorMessage(header, lineNumber, e.getMessage()); + logger.log(Level.WARNING, message, e); alr.setActionResult(ActionLogRecord.Result.InternalError); - alr.setInfo( alr.getInfo() + "// " + e.getMessage()); - return error(Status.INTERNAL_SERVER_ERROR, e.getMessage()); + alr.setInfo( alr.getInfo() + "// " + message); + return error(Status.INTERNAL_SERVER_ERROR, message); } finally { if (br != null) { @@ -319,6 +328,68 @@ public Response loadDatasetFields(File file) { return ok( Json.createObjectBuilder().add("added", responseArr) ); } + /** + * Provide a general error message including the part and line number + * @param header + * @param lineNumber + * @param message + * @return + */ + public String getGeneralErrorMessage(HeaderType header, int lineNumber, String message) { + List arguments = new ArrayList<>(); + arguments.add(header.name()); + arguments.add(String.valueOf(lineNumber)); + arguments.add(message); + return BundleUtil.getStringFromBundle("api.admin.datasetfield.load.GeneralErrorMessage", arguments); + } + + /** + * Turn ArrayIndexOutOfBoundsException into an informative error message + * @param lineNumber + * @param header + * @param e + * @return + */ + public String getArrayIndexOutOfBoundMessage(HeaderType header, + int lineNumber, + ArrayIndexOutOfBoundsException e) { + + List columns = getColumnsByHeader(header); + int wrongIndex = Integer.parseInt(e.getMessage()); + + String column = columns.get(wrongIndex - 1); + List arguments = new ArrayList<>(); + arguments.add(header.name()); + arguments.add(String.valueOf(lineNumber)); + arguments.add(column); + arguments.add(String.valueOf(wrongIndex + 1)); + return BundleUtil.getStringFromBundle( + "api.admin.datasetfield.load.ArrayIndexOutOfBoundMessage", + arguments + ); + } + + /** + * Get the list of columns by the type of header + * @param header + * @return + */ + private List getColumnsByHeader(HeaderType header) { + List columns = null; + if (header.equals(HeaderType.METADATABLOCK)) { + columns = Arrays.asList("name", "dataverseAlias", "displayName"); + } else if (header.equals(HeaderType.DATASETFIELD)) { + columns = Arrays.asList("name", "title", "description", "watermark", + "fieldType", "displayOrder", "displayFormat", "advancedSearchField", + "allowControlledVocabulary", "allowmultiples", "facetable", + "displayoncreate", "required", "parent", "metadatablock_id"); + } else if (header.equals(HeaderType.CONTROLLEDVOCABULARY)) { + columns = Arrays.asList("DatasetField", "Value", "identifier", "displayOrder"); + } + + return columns; + } + private String parseMetadataBlock(String[] values) { //Test to see if it exists by name MetadataBlock mdb = metadataBlockService.findByName(values[1]); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 181f26446cb..6de8c5f4a94 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2510,3 +2510,7 @@ externaltools.dct.displayname=Data Curation Tool externaltools.dct.description=Data Curation Tool for curation of variables externaltools.explorer.displayname=Data Explorer externaltools.explorer.description=The Data Explorer provides a GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. + +# api/admin/datasetfield/load +api.admin.datasetfield.load.ArrayIndexOutOfBoundMessage=Error parsing metadata block in {0} part, line #{1}: missing ''{2}'' column (#{3}) +api.admin.datasetfield.load.GeneralErrorMessage=Error parsing metadata block in {0} part, line #{1}: {2} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 6c7eb1391f3..e944ca5b2bf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -8,15 +8,24 @@ import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2AP; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.OrcidOAuth2AP; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import org.junit.Test; import org.junit.BeforeClass; + +import java.util.Map; import java.util.UUID; +import java.util.logging.Logger; import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; import static junit.framework.Assert.assertEquals; @@ -27,6 +36,8 @@ public class AdminIT { + private static final Logger logger = Logger.getLogger(AdminIT.class.getCanonicalName()); + @BeforeClass public static void setUp() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); @@ -671,4 +682,72 @@ public void testMigrateHDLToDOI() { .statusCode(OK.getStatusCode()); } + @Test + public void testLoadMetadataBlock_NoErrorPath() { + Response createUser = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + byte[] updatedContent = null; + try { + updatedContent = Files.readAllBytes(Paths.get("src/test/resources/tsv/citation.tsv")); + } catch (IOException e) { + logger.warning(e.getMessage()); + assertEquals(0,1); + } + Response response = UtilIT.loadMetadataBlock(apiToken, updatedContent); + assertEquals(200, response.getStatusCode()); + response.then().assertThat().statusCode(OK.getStatusCode()); + + String body = response.getBody().asString(); + String status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + + Map>> data = JsonPath.from(body).getMap("data"); + assertEquals(1, data.size()); + List> addedElements = data.get("added"); + assertEquals(321, addedElements.size()); + + Map statistics = new HashMap<>(); + for (Map unit : addedElements) { + assertEquals(2, unit.size()); + assertTrue(unit.containsKey("name")); + assertTrue(unit.containsKey("type")); + String type = unit.get("type"); + if (!statistics.containsKey(type)) + statistics.put(type, 0); + statistics.put(type, statistics.get(type) + 1); + } + + assertEquals(3, statistics.size()); + assertEquals(1, (int) statistics.get("MetadataBlock")); + assertEquals(78, (int) statistics.get("DatasetField")); + assertEquals(242, (int) statistics.get("Controlled Vocabulary")); + } + + @Test + public void testLoadMetadataBlock_ErrorHandling() { + Response createUser = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + byte[] updatedContent = null; + try { + updatedContent = Files.readAllBytes(Paths.get("src/test/resources/tsv/test.tsv")); + } catch (IOException e) { + logger.warning(e.getMessage()); + assertEquals(0,1); + } + Response response = UtilIT.loadMetadataBlock(apiToken, updatedContent); + assertEquals(500, response.getStatusCode()); + response.then().assertThat().statusCode(INTERNAL_SERVER_ERROR.getStatusCode()); + + String body = response.getBody().asString(); + String status = JsonPath.from(body).getString("status"); + assertEquals("ERROR", status); + + String message = JsonPath.from(body).getString("message"); + assertEquals( + "Error parsing metadata block in DATASETFIELD part, line #5: missing 'watermark' column (#5)", + message + ); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java new file mode 100644 index 00000000000..d6257063bd2 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java @@ -0,0 +1,62 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.util.BundleUtil; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class DatasetFieldServiceApiTest { + + @Test + public void testArrayIndexOutOfBoundMessageBundle() { + List arguments = new ArrayList<>(); + arguments.add("DATASETFIELD"); + arguments.add(String.valueOf(5)); + arguments.add("watermark"); + arguments.add(String.valueOf(4 + 1)); + + String bundle = "api.admin.datasetfield.load.ArrayIndexOutOfBoundMessage"; + String message = BundleUtil.getStringFromBundle(bundle, arguments); + assertEquals( + "Error parsing metadata block in DATASETFIELD part, line #5: missing 'watermark' column (#5)", + message + ); + } + + @Test + public void testGeneralErrorMessageBundle() { + List arguments = new ArrayList<>(); + arguments.add("DATASETFIELD"); + arguments.add(String.valueOf(5)); + arguments.add("some error message"); + String bundle = "api.admin.datasetfield.load.GeneralErrorMessage"; + String message = BundleUtil.getStringFromBundle(bundle, arguments); + assertEquals( + "Error parsing metadata block in DATASETFIELD part, line #5: some error message", + message + ); + } + + @Test + public void testGetArrayIndexOutOfBoundMessage() { + DatasetFieldServiceApi api = new DatasetFieldServiceApi(); + String message = api.getArrayIndexOutOfBoundMessage(DatasetFieldServiceApi.HeaderType.DATASETFIELD, 5, new ArrayIndexOutOfBoundsException("4")); + assertEquals( + "Error parsing metadata block in DATASETFIELD part, line #5: missing 'watermark' column (#5)", + message + ); + } + + @Test + public void testGetGeneralErrorMessage() { + DatasetFieldServiceApi api = new DatasetFieldServiceApi(); + String message = api.getGeneralErrorMessage(DatasetFieldServiceApi.HeaderType.DATASETFIELD, 5, "some error"); + assertEquals( + "Error parsing metadata block in DATASETFIELD part, line #5: some error", + message + ); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index fbc4e87b3ac..adfd439cde9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1368,6 +1368,14 @@ static Response getDatasetThumbnailMetadata(Integer datasetId, String apiToken) .get("/api/admin/datasets/thumbnailMetadata/" + datasetId); } + static Response loadMetadataBlock(String apiToken, byte[] body) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("text/tab-separated-values; charset=utf-8") + .body(body) + .post("/api/admin/datasetfield/load"); + } + static Response useThumbnailFromDataFile(String datasetPersistentId, long dataFileId1, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) diff --git a/src/test/resources/tsv/citation.tsv b/src/test/resources/tsv/citation.tsv new file mode 100644 index 00000000000..3aa93d67aa3 --- /dev/null +++ b/src/test/resources/tsv/citation.tsv @@ -0,0 +1,324 @@ +#metadataBlock name dataverseAlias displayName blockURI + citation Citation Metadata https://dataverse.org/schema/citation/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + title Title Full title by which the Dataset is known. Enter title... text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title + subtitle Subtitle A secondary title used to amplify or state certain limitations on the main title. text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation + alternativeTitle Alternative Title A title by which the work is commonly referred, or an abbreviation of the title. text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative + alternativeURL Alternative URL A URL where the dataset can be viewed, such as a personal or project website. Enter full URL, starting with http:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution + otherId Other ID Another unique identifier that identifies this Dataset (e.g., producer's or another repository's number). none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation + otherIdAgency Agency Name of agency which generated this identifier. text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation + otherIdValue Identifier Other identifier that corresponds to this Dataset. text 6 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation + author Author The person(s), corporate body(ies), or agency(ies) responsible for creating the work. none 7 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/creator + authorName Name The author's Family Name, Given Name or the name of the organization responsible for this Dataset. FamilyName, GivenName or Organization text 8 #VALUE TRUE FALSE FALSE TRUE TRUE TRUE author citation + authorAffiliation Affiliation The organization with which the author is affiliated. text 9 (#VALUE) TRUE FALSE FALSE TRUE TRUE FALSE author citation + authorIdentifierScheme Identifier Scheme Name of the identifier scheme (ORCID, ISNI). text 10 - #VALUE: FALSE TRUE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifierScheme + authorIdentifier Identifier Uniquely identifies an individual author or organization, according to various schemes. text 11 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifier + datasetContact Contact The contact(s) for this Dataset. none 12 FALSE FALSE TRUE FALSE TRUE FALSE citation + datasetContactName Name The contact's Family Name, Given Name or the name of the organization. FamilyName, GivenName or Organization text 13 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation + datasetContactAffiliation Affiliation The organization with which the contact is affiliated. text 14 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation + datasetContactEmail E-mail The e-mail address(es) of the contact(s) for the Dataset. This will not be displayed. email 15 #EMAIL FALSE FALSE FALSE FALSE TRUE TRUE datasetContact citation + dsDescription Description A summary describing the purpose, nature, and scope of the Dataset. none 16 FALSE FALSE TRUE FALSE TRUE FALSE citation + dsDescriptionValue Text A summary describing the purpose, nature, and scope of the Dataset. textbox 17 #VALUE TRUE FALSE FALSE FALSE TRUE TRUE dsDescription citation + dsDescriptionDate Date In cases where a Dataset contains more than one description (for example, one might be supplied by the data producer and another prepared by the data repository where the data are deposited), the date attribute is used to distinguish between the two descriptions. The date attribute follows the ISO convention of YYYY-MM-DD. YYYY-MM-DD date 18 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE dsDescription citation + subject Subject Domain-specific Subject Categories that are topically relevant to the Dataset. text 19 TRUE TRUE TRUE TRUE TRUE TRUE citation http://purl.org/dc/terms/subject + keyword Keyword Key terms that describe important aspects of the Dataset. none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation + keywordValue Term Key terms that describe important aspects of the Dataset. Can be used for building keyword indexes and for classification and retrieval purposes. A controlled vocabulary can be employed. The vocab attribute is provided for specification of the controlled vocabulary in use, such as LCSH, MeSH, or others. The vocabURI attribute specifies the location for the full controlled vocabulary. text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation + keywordVocabulary Vocabulary For the specification of the keyword controlled vocabulary in use, such as LCSH, MeSH, or others. text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Vocabulary URL Keyword vocabulary URL points to the web presence that describes the keyword vocabulary, if appropriate. Enter an absolute URL where the keyword vocabulary web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + topicClassification Topic Classification The classification field indicates the broad important topic(s) and subjects that the data cover. Library of Congress subject terms may be used here. none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation + topicClassValue Term Topic or Subject term that is relevant to this Dataset. text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation + topicClassVocab Vocabulary Provided for specification of the controlled vocabulary in use, e.g., LCSH, MeSH, etc. text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Vocabulary URL Specifies the URL location for the full controlled vocabulary. Enter full URL, starting with http:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + publication Related Publication Publications that use the data from this Dataset. The full list of Related Publications will be displayed on the metadata tab. none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy + publicationCitation Citation The full bibliographic citation for this related publication. textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType ID Type The type of digital identifier used for this publication (e.g., Digital Object Identifier (DOI)). text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber ID Number The identifier for the selected ID type. text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL Link to the publication web page (e.g., journal article page, archive record page, or other). Enter full URL, starting with http:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution + notesText Notes Additional important information about the Dataset. textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language Language of the Dataset text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer Person or organization with the financial or administrative responsibility over this Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name Producer name FamilyName, GivenName or Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE producer citation + producerAffiliation Affiliation The organization with which the producer is affiliated. text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviation The abbreviation by which the producer is commonly known. (ex. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL Producer URL points to the producer's web presence, if appropriate. Enter an absolute URL where the producer's web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL URL for the producer's logo, which points to this producer's web-accessible logo image. Enter an absolute URL where the producer's logo image is found, such as http://www.my.org/images/logo.gif. Enter full URL for image, starting with http:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date Date when the data collection or other materials were produced (not distributed, published or archived). YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Place The location where the data collection and any other related materials were produced. text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + contributor Contributor The organization or person responsible for either collecting, managing, or otherwise contributing in some form to the development of the resource. none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type The type of contributor of the resource. text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The Family Name, Given Name or organization name of the contributor. FamilyName, GivenName or Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Grant Information Grant Information none 46 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Grant Agency Grant Number Agency text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Grant Number The grant or contract number of the project that sponsored the effort. text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The organization designated by the author or producer to generate copies of the particular work including any necessary editions or revisions. none 49 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name Distributor name FamilyName, GivenName or Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The organization with which the distributor contact is affiliated. text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviation The abbreviation by which this distributor is commonly known (e.g., IQSS, ICPSR). text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL Distributor URL points to the distributor's web presence, if appropriate. Enter an absolute URL where the distributor's web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL URL of the distributor's logo, which points to this distributor's web-accessible logo image. Enter an absolute URL where the distributor's logo image is found, such as http://www.my.org/images/logo.gif. Enter full URL for image, starting with http:// url 54
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date Date that the work was made available for distribution/presentation. YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The person (Family Name, Given Name) or the name of the organization that deposited this Dataset to the repository. text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date Date that the Dataset was deposited into the repository. YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period Covered Time period to which the data refer. This item reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. Also known as span. none 58 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Start date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. YYYY-MM-DD date 59 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End End date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection Contains the date(s) when the data were collected. none 61 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date when the data collection started. YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date when the data collection ended. YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Kind of Data Type of data included in the file: survey data, census/enumeration data, aggregate data, clinical data, event/transaction data, program source code, machine-readable text, administrative records data, experimental data, psychological test, textual data, coded textual, coded documents, time budget diaries, observation data/ratings, process-produced data, or other. text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the Dataset series. none 65 : FALSE FALSE FALSE FALSE FALSE FALSE citation + seriesName Name Name of the dataset series to which the Dataset belongs. text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information History of the series and summary of those features that apply to the series as a whole. textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset. none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name Name of software used to generate the Dataset. text 69 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE software citation + softwareVersion Version Version of the software used to generate the Dataset. text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Any material related to this Dataset. textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Datasets Any Datasets that are related to this Dataset, such as previous research on this subject. textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other References Any references that would serve as background or supporting material to this Dataset. text 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Sources List of books, articles, serials, or machine-readable data files that served as the sources of the data collection. textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Sources For historical materials, information about the origin of the sources and the rules followed in establishing the sources should be specified. textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Noted Assessment of characteristics and source material. textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources Level of documentation of the original sources. textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation +#controlledVocabulary DatasetField Value identifier displayOrder + subject Agricultural Sciences D01 0 + subject Arts and Humanities D0 1 + subject Astronomy and Astrophysics D1 2 + subject Business and Management D2 3 + subject Chemistry D3 4 + subject Computer and Information Science D7 5 + subject Earth and Environmental Sciences D4 6 + subject Engineering D5 7 + subject Law D8 8 + subject Mathematical Sciences D9 9 + subject Medicine, Health and Life Sciences D6 10 + subject Physics D10 11 + subject Social Sciences D11 12 + subject Other D12 13 + publicationIDType ark 0 + publicationIDType arXiv 1 + publicationIDType bibcode 2 + publicationIDType doi 3 + publicationIDType ean13 4 + publicationIDType eissn 5 + publicationIDType handle 6 + publicationIDType isbn 7 + publicationIDType issn 8 + publicationIDType istc 9 + publicationIDType lissn 10 + publicationIDType lsid 11 + publicationIDType pmid 12 + publicationIDType purl 13 + publicationIDType upc 14 + publicationIDType url 15 + publicationIDType urn 16 + contributorType Data Collector 0 + contributorType Data Curator 1 + contributorType Data Manager 2 + contributorType Editor 3 + contributorType Funder 4 + contributorType Hosting Institution 5 + contributorType Project Leader 6 + contributorType Project Manager 7 + contributorType Project Member 8 + contributorType Related Person 9 + contributorType Researcher 10 + contributorType Research Group 11 + contributorType Rights Holder 12 + contributorType Sponsor 13 + contributorType Supervisor 14 + contributorType Work Package Leader 15 + contributorType Other 16 + authorIdentifierScheme ORCID 0 + authorIdentifierScheme ISNI 1 + authorIdentifierScheme LCNA 2 + authorIdentifierScheme VIAF 3 + authorIdentifierScheme GND 4 + authorIdentifierScheme DAI 5 + authorIdentifierScheme ResearcherID 6 + authorIdentifierScheme ScopusID 7 + language Abkhaz 0 + language Afar 1 + language Afrikaans 2 + language Akan 3 + language Albanian 4 + language Amharic 5 + language Arabic 6 + language Aragonese 7 + language Armenian 8 + language Assamese 9 + language Avaric 10 + language Avestan 11 + language Aymara 12 + language Azerbaijani 13 + language Bambara 14 + language Bashkir 15 + language Basque 16 + language Belarusian 17 + language Bengali, Bangla 18 + language Bihari 19 + language Bislama 20 + language Bosnian 21 + language Breton 22 + language Bulgarian 23 + language Burmese 24 + language Catalan,Valencian 25 + language Chamorro 26 + language Chechen 27 + language Chichewa, Chewa, Nyanja 28 + language Chinese 29 + language Chuvash 30 + language Cornish 31 + language Corsican 32 + language Cree 33 + language Croatian 34 + language Czech 35 + language Danish 36 + language Divehi, Dhivehi, Maldivian 37 + language Dutch 38 + language Dzongkha 39 + language English 40 + language Esperanto 41 + language Estonian 42 + language Ewe 43 + language Faroese 44 + language Fijian 45 + language Finnish 46 + language French 47 + language Fula, Fulah, Pulaar, Pular 48 + language Galician 49 + language Georgian 50 + language German 51 + language Greek (modern) 52 + language Guaraní 53 + language Gujarati 54 + language Haitian, Haitian Creole 55 + language Hausa 56 + language Hebrew (modern) 57 + language Herero 58 + language Hindi 59 + language Hiri Motu 60 + language Hungarian 61 + language Interlingua 62 + language Indonesian 63 + language Interlingue 64 + language Irish 65 + language Igbo 66 + language Inupiaq 67 + language Ido 68 + language Icelandic 69 + language Italian 70 + language Inuktitut 71 + language Japanese 72 + language Javanese 73 + language Kalaallisut, Greenlandic 74 + language Kannada 75 + language Kanuri 76 + language Kashmiri 77 + language Kazakh 78 + language Khmer 79 + language Kikuyu, Gikuyu 80 + language Kinyarwanda 81 + language Kyrgyz 82 + language Komi 83 + language Kongo 84 + language Korean 85 + language Kurdish 86 + language Kwanyama, Kuanyama 87 + language Latin 88 + language Luxembourgish, Letzeburgesch 89 + language Ganda 90 + language Limburgish, Limburgan, Limburger 91 + language Lingala 92 + language Lao 93 + language Lithuanian 94 + language Luba-Katanga 95 + language Latvian 96 + language Manx 97 + language Macedonian 98 + language Malagasy 99 + language Malay 100 + language Malayalam 101 + language Maltese 102 + language Māori 103 + language Marathi (Marāṭhī) 104 + language Marshallese 105 + language Mixtepec Mixtec 106 + language Mongolian 107 + language Nauru 108 + language Navajo, Navaho 109 + language Northern Ndebele 110 + language Nepali 111 + language Ndonga 112 + language Norwegian Bokmål 113 + language Norwegian Nynorsk 114 + language Norwegian 115 + language Nuosu 116 + language Southern Ndebele 117 + language Occitan 118 + language Ojibwe, Ojibwa 119 + language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 + language Oromo 121 + language Oriya 122 + language Ossetian, Ossetic 123 + language Panjabi, Punjabi 124 + language Pāli 125 + language Persian (Farsi) 126 + language Polish 127 + language Pashto, Pushto 128 + language Portuguese 129 + language Quechua 130 + language Romansh 131 + language Kirundi 132 + language Romanian 133 + language Russian 134 + language Sanskrit (Saṁskṛta) 135 + language Sardinian 136 + language Sindhi 137 + language Northern Sami 138 + language Samoan 139 + language Sango 140 + language Serbian 141 + language Scottish Gaelic, Gaelic 142 + language Shona 143 + language Sinhala, Sinhalese 144 + language Slovak 145 + language Slovene 146 + language Somali 147 + language Southern Sotho 148 + language Spanish, Castilian 149 + language Sundanese 150 + language Swahili 151 + language Swati 152 + language Swedish 153 + language Tamil 154 + language Telugu 155 + language Tajik 156 + language Thai 157 + language Tigrinya 158 + language Tibetan Standard, Tibetan, Central 159 + language Turkmen 160 + language Tagalog 161 + language Tswana 162 + language Tonga (Tonga Islands) 163 + language Turkish 164 + language Tsonga 165 + language Tatar 166 + language Twi 167 + language Tahitian 168 + language Uyghur, Uighur 169 + language Ukrainian 170 + language Urdu 171 + language Uzbek 172 + language Venda 173 + language Vietnamese 174 + language Volapük 175 + language Walloon 176 + language Welsh 177 + language Wolof 178 + language Western Frisian 179 + language Xhosa 180 + language Yiddish 181 + language Yoruba 182 + language Zhuang, Chuang 183 + language Zulu 184 + language Not applicable 185 diff --git a/src/test/resources/tsv/test.tsv b/src/test/resources/tsv/test.tsv new file mode 100644 index 00000000000..0530f38f9cf --- /dev/null +++ b/src/test/resources/tsv/test.tsv @@ -0,0 +1,9 @@ +#metadataBlock name dataverseAlias displayName + crc990time crc990 CRC 990 Time Metadata +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id + crc990time_when when none 1 FALSE FALSE FALSE FALSE TRUE crc990time + crc990time_standardFormat standardFormat "The format of the dates used in the dataset: +Year: YY or YYYY (e.g. 09 or 2009);\nMonth: M or MM (e.g. 3 or 03); Day: D or DD (e.g. 9 or 09); Hour (24h clock): h or hh (e.g. 8 or 08); Minute: m or mm (e.g. 5 or 05); Second: s or ss (e.g. 7 or 07); Separators: [Space] [:] [.] [-] [\] [/]" text 2 FALSE FALSE FALSE FALSE TRUE crc990time_when crc990time + crc990time_startDate startDate Start date of the measurements formatted according to above. date 3 FALSE FALSE FALSE FALSE TRUE crc990time_when crc990time + crc990time_endDate endDate End date of the measurements formatted according to above. date 4 FALSE FALSE FALSE FALSE TRUE crc990time_when crc990time +#controlledVocabulary DatasetField Value identifier displayOrder