-
Notifications
You must be signed in to change notification settings - Fork 477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JSON Schema creator and validator #10109
Changes from 22 commits
61abac1
6ba4ef5
38f09f6
5ca4cc0
02a570a
42e055f
521e8d2
7c630f7
720b3b0
7be5347
c553d1b
a080f84
7d38366
7887a05
437e7cc
d7fccf7
73593ac
33aefff
e4ede35
766c9c3
c82faf9
44a07a3
7d687e9
3bc5ef7
e501845
212baf2
9367026
b7a3e78
2d3f7ab
0a77e2a
7db3629
194945b
c1bd009
c4d9b6e
45df764
d8e327d
866b5ea
e235257
2c41687
547d71c
c9374f3
7697157
e3bff3c
c54a85f
2379828
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Functionality has been added to help validate dataset json prior to dataset creation. There are two new API endpoints in this release. The first takes in a Dataverse Collection alias and returns a custom schema based on the required fields of the collection. | ||
The second takes in a Dataverse collection alias and a dataset json file and does an automated validation of the json file against the custom schema for the collection. (Issue 9464 and 9465) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
import edu.harvard.iq.dataverse.search.IndexServiceBean; | ||
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; | ||
import edu.harvard.iq.dataverse.search.SolrSearchResult; | ||
import edu.harvard.iq.dataverse.util.BundleUtil; | ||
import edu.harvard.iq.dataverse.util.StringUtil; | ||
import edu.harvard.iq.dataverse.util.SystemConfig; | ||
import java.io.File; | ||
|
@@ -42,7 +43,13 @@ | |
import jakarta.persistence.NonUniqueResultException; | ||
import jakarta.persistence.PersistenceContext; | ||
import jakarta.persistence.TypedQuery; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.solr.client.solrj.SolrServerException; | ||
import org.everit.json.schema.Schema; | ||
import org.everit.json.schema.ValidationException; | ||
import org.everit.json.schema.loader.SchemaLoader; | ||
import org.json.JSONObject; | ||
import org.json.JSONTokener; | ||
|
||
/** | ||
* | ||
|
@@ -80,6 +87,9 @@ public class DataverseServiceBean implements java.io.Serializable { | |
@EJB | ||
PermissionServiceBean permissionService; | ||
|
||
@EJB | ||
DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService; | ||
|
||
@EJB | ||
SystemConfig systemConfig; | ||
|
||
|
@@ -919,5 +929,228 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) { | |
return em.createNativeQuery(cqString).getResultList(); | ||
} | ||
|
||
|
||
public String getCollectionDatasetSchema(String dataverseAlias) { | ||
|
||
List<MetadataBlock> selectedBlocks = new ArrayList<>(); | ||
List<DatasetFieldType> requiredDSFT = new ArrayList<>(); | ||
|
||
Dataverse testDV = this.findByAlias(dataverseAlias); | ||
|
||
while (!testDV.isMetadataBlockRoot()) { | ||
if (testDV.getOwner() == null) { | ||
break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value | ||
} | ||
testDV = testDV.getOwner(); | ||
} | ||
|
||
selectedBlocks.addAll(testDV.getMetadataBlocks()); | ||
|
||
for (MetadataBlock mdb : selectedBlocks) { | ||
for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { | ||
if (!dsft.isChild()) { | ||
DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId()); | ||
if (dsfIl != null) { | ||
dsft.setRequiredDV(dsfIl.isRequired()); | ||
dsft.setInclude(dsfIl.isInclude()); | ||
} else { | ||
dsft.setRequiredDV(dsft.isRequired()); | ||
dsft.setInclude(true); | ||
} | ||
if (dsft.isHasChildren()) { | ||
for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { | ||
DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); | ||
if (dsfIlChild != null) { | ||
child.setRequiredDV(dsfIlChild.isRequired()); | ||
child.setInclude(dsfIlChild.isInclude()); | ||
} else { | ||
// in the case of conditionally required (child = true, parent = false) | ||
// we set this to false; i.e this is the default "don't override" value | ||
child.setRequiredDV(child.isRequired() && dsft.isRequired()); | ||
child.setInclude(true); | ||
} | ||
} | ||
} | ||
if(dsft.isRequiredDV()){ | ||
requiredDSFT.add(dsft); | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
String reqMDBNames = ""; | ||
List<MetadataBlock> hasReqFields = new ArrayList<>(); | ||
String retval = datasetSchemaPreface; | ||
for (MetadataBlock mdb : selectedBlocks) { | ||
for (DatasetFieldType dsft : requiredDSFT) { | ||
if (dsft.getMetadataBlock().equals(mdb)) { | ||
hasReqFields.add(mdb); | ||
if (!reqMDBNames.isEmpty()) reqMDBNames += ","; | ||
reqMDBNames += "\"" + mdb.getName() + "\""; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
for (MetadataBlock mdb : hasReqFields) { | ||
retval += getCustomMDBSchema(mdb, requiredDSFT); | ||
} | ||
|
||
retval += "\n }"; | ||
|
||
retval += endOfjson.replace("blockNames", reqMDBNames); | ||
|
||
return retval; | ||
|
||
} | ||
|
||
private String getCustomMDBSchema (MetadataBlock mdb, List<DatasetFieldType> requiredDSFT){ | ||
String retval = ""; | ||
boolean mdbHasReqField = false; | ||
int numReq = 0; | ||
List<DatasetFieldType> requiredThisMDB = new ArrayList<>(); | ||
|
||
for (DatasetFieldType dsft : requiredDSFT ){ | ||
|
||
if(dsft.getMetadataBlock().equals(mdb)){ | ||
numReq++; | ||
mdbHasReqField = true; | ||
requiredThisMDB.add(dsft); | ||
} | ||
} | ||
if (mdbHasReqField){ | ||
retval += startOfMDB.replace("blockName", mdb.getName()); | ||
|
||
retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); | ||
int count = 0; | ||
for (DatasetFieldType dsft:requiredThisMDB ){ | ||
count++; | ||
String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); | ||
Comment on lines
+1047
to
+1053
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sort of wondering why we're starting with a string and doing |
||
if (count < requiredThisMDB.size()){ | ||
retval += reqValImp + "\n"; | ||
} else { | ||
reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); | ||
retval += reqValImp+ "\n"; | ||
retval += endOfReqVal; | ||
} | ||
} | ||
|
||
} | ||
|
||
return retval; | ||
} | ||
|
||
public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { | ||
JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); | ||
|
||
try { | ||
Schema schema = SchemaLoader.load(rawSchema); | ||
schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid | ||
} catch (ValidationException vx) { | ||
logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); | ||
return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); | ||
} catch (Exception ex) { | ||
logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); | ||
return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); | ||
} | ||
|
||
return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); | ||
} | ||
|
||
private String datasetSchemaPreface = | ||
"{\n" + | ||
" \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + | ||
" \"$defs\": {\n" + | ||
" \"field\": {\n" + | ||
" \"type\": \"object\",\n" + | ||
" \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" + | ||
Comment on lines
+1110
to
+1115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could use the fancy new text blocks here: https://docs.oracle.com/en/java/javase/21/text-blocks/index.html |
||
" \"properties\": {\n" + | ||
" \"value\": {\n" + | ||
" \"anyOf\": [\n" + | ||
" {\n" + | ||
" \"type\": \"array\"\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"type\": \"string\"\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"$ref\": \"#/$defs/field\"\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" },\n" + | ||
" \"typeClass\": {\n" + | ||
" \"type\": \"string\"\n" + | ||
" },\n" + | ||
" \"multiple\": {\n" + | ||
" \"type\": \"boolean\"\n" + | ||
" },\n" + | ||
" \"typeName\": {\n" + | ||
" \"type\": \"string\"\n" + | ||
" }\n" + | ||
" }\n" + | ||
" }\n" + | ||
"},\n" + | ||
"\"type\": \"object\",\n" + | ||
"\"properties\": {\n" + | ||
" \"datasetVersion\": {\n" + | ||
" \"type\": \"object\",\n" + | ||
" \"properties\": {\n" + | ||
" \"license\": {\n" + | ||
" \"type\": \"object\",\n" + | ||
" \"properties\": {\n" + | ||
" \"name\": {\n" + | ||
" \"type\": \"string\"\n" + | ||
" },\n" + | ||
" \"uri\": {\n" + | ||
" \"type\": \"string\",\n" + | ||
" \"format\": \"uri\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"required\": [\"name\", \"uri\"]\n" + | ||
" },\n" + | ||
" \"metadataBlocks\": {\n" + | ||
" \"type\": \"object\",\n" + | ||
" \"properties\": {\n" + | ||
"" ; | ||
|
||
private String startOfMDB = "" + | ||
" \"blockName\": {\n" + | ||
" \"type\": \"object\",\n" + | ||
" \"properties\": {\n" + | ||
" \"fields\": {\n" + | ||
" \"type\": \"array\",\n" + | ||
" \"items\": {\n" + | ||
" \"$ref\": \"#/$defs/field\"\n" + | ||
" },"; | ||
|
||
private String reqValTemplate = " {\n" + | ||
" \"contains\": {\n" + | ||
" \"properties\": {\n" + | ||
" \"typeName\": {\n" + | ||
" \"const\": \"reqFieldTypeName\"\n" + | ||
" }\n" + | ||
" }\n" + | ||
" }\n" + | ||
" },"; | ||
|
||
private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" + | ||
" \"allOf\": [\n"; | ||
private String endOfReqVal = " ]\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"required\": [\"fields\"]\n" + | ||
" },"; | ||
|
||
private String endOfjson = ",\n" + | ||
" \"required\": [blockNames]\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"required\": [\"metadataBlocks\"]\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"required\": [\"datasetVersion\"]\n" + | ||
"}\n"; | ||
|
||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; | ||
|
@@ -68,6 +69,7 @@ | |
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; | ||
import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand; | ||
import edu.harvard.iq.dataverse.settings.JvmSettings; | ||
import edu.harvard.iq.dataverse.settings.SettingsServiceBean; | ||
import edu.harvard.iq.dataverse.util.BundleUtil; | ||
|
@@ -126,7 +128,6 @@ | |
import java.util.Optional; | ||
import java.util.stream.Collectors; | ||
import jakarta.servlet.http.HttpServletResponse; | ||
import jakarta.validation.constraints.NotNull; | ||
import jakarta.ws.rs.WebApplicationException; | ||
import jakarta.ws.rs.core.Context; | ||
import jakarta.ws.rs.core.StreamingOutput; | ||
|
@@ -232,6 +233,39 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, | |
|
||
} | ||
} | ||
|
||
@POST | ||
@AuthRequired | ||
@Path("{identifier}/validateDatasetJson") | ||
@Consumes("application/json") | ||
public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) { | ||
User u = getRequestUser(crc); | ||
try { | ||
String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body)); | ||
return ok(validationMessage); | ||
} catch (WrappedResponse ex) { | ||
Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); | ||
return ex.getResponse(); | ||
} | ||
} | ||
|
||
@GET | ||
@AuthRequired | ||
@Path("{identifier}/datasetSchema") | ||
@Produces(MediaType.APPLICATION_JSON) | ||
public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { | ||
User u = getRequestUser(crc); | ||
|
||
try { | ||
String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); | ||
return ok(datasetSchema); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sekmiller and were talking abouthow we probably want to return just the JSON (instead of escaped JSON in our normal "ok... data" data structure). Jim did this recently in this commit: 9953 - don't wrap linkset in a data element 3a4d8f9 Otherwise, it looks like this: { There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sekmiller and I decided to return just the JSON Schema. See 2d3f7ab. This way instead of the \n characters and data...message, you get just what you want:
|
||
} catch (WrappedResponse ex) { | ||
Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); | ||
return ex.getResponse(); | ||
} | ||
} | ||
|
||
|
||
|
||
@POST | ||
@AuthRequired | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.