Skip to content

Commit

Permalink
Merge 6cb9a4c into 26017b3
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenwinship committed Mar 27, 2024
2 parents 26017b3 + 6cb9a4c commit cf1d769
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 1 deletion.
5 changes: 5 additions & 0 deletions doc/release-notes/10424-new-api-for-mdc.md
@@ -0,0 +1,5 @@
(Please put at the bottom of the list under 🌐 API)

### Experimental Make Data Count processingState API

An experimental Make Data Count processingState API has been added. For now it has been documented in the developer guide: https://guides.dataverse.org/en/6.2/developers/make-data-count.html#processing-archived-logs
29 changes: 29 additions & 0 deletions doc/sphinx-guides/source/developers/make-data-count.rst
Expand Up @@ -88,6 +88,35 @@ To read more about the Make Data Count api, see https://github.com/datacite/sash

You can compare the MDC metrics display with the Dataverse installation's original by toggling the ``:DisplayMDCMetrics`` setting (true by default to display MDC metrics).

Processing Archived Logs
------------------------

A new script (release date TBD) will be available for processing archived Dataverse log files. Monthly logs that are zipped, TARed, and copied to an archive can be processed by this script running nightly or weekly.

The script will keep track of the state of each tar file they are processed and will make use of the following "processingState" API endpoints, which allow the state of each file to be checked or modified.

The possible states are new, done, skip, processing, and failed.

Setting the state to "skip" will prevent the file from being processed if the developer needs to analyze the contents.

"failed" files will be re-tried in a later run.

"done" files are successful and will be ignored going forward.

The files currently being processed will have the state "processing".

The script will process the newest set of log files (merging files from multiple nodes) and call Counter Processor.

APIs to manage the states include GET, POST, and DELETE (for testing), as shown below.

Note: ``yearMonth`` must be in the format ``yyyymm`` or ``yyyymmdd``.

``curl -X GET http://localhost:8080/api/admin/{yearMonth}/processingState``

``curl -X POST http://localhost:8080/api/admin/{yearMonth}/processingState?state=done``

``curl -X DELETE http://localhost:8080/api/admin/{yearMonth}/processingState``

Resources
---------

Expand Down
54 changes: 53 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
Expand Up @@ -7,6 +7,8 @@
import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean;
import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics;
import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessStateServiceBean;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider;
Expand All @@ -29,6 +31,8 @@
import jakarta.json.JsonObject;
import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonValue;
import jakarta.ws.rs.DELETE;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.POST;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.PathParam;
Expand All @@ -47,6 +51,8 @@ public class MakeDataCountApi extends AbstractApiBean {
@EJB
DatasetMetricsServiceBean datasetMetricsService;
@EJB
MakeDataCountProcessStateServiceBean makeDataCountProcessStateService;
@EJB
DatasetExternalCitationsServiceBean datasetExternalCitationsService;
@EJB
DatasetServiceBean datasetService;
Expand Down Expand Up @@ -110,7 +116,7 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer

@POST
@Path("/addUsageMetricsFromSushiReport")
public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) {
public Response addUsageMetricsFromSushiReportAll(@QueryParam("reportOnDisk") String reportOnDisk) {

try {
JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk);
Expand Down Expand Up @@ -200,5 +206,51 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
return wr.getResponse();
}
}
@GET
@Path("{yearMonth}/processingState")
public Response getProcessingState(@PathParam("yearMonth") String yearMonth) {
MakeDataCountProcessState mdcps;
try {
mdcps = makeDataCountProcessStateService.getMakeDataCountProcessState(yearMonth);
} catch (IllegalArgumentException e) {
return error(Status.BAD_REQUEST,e.getMessage());
}
if (mdcps != null) {
JsonObjectBuilder output = Json.createObjectBuilder();
output.add("yearMonth", mdcps.getYearMonth());
output.add("state", mdcps.getState().name());
output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString());
return ok(output);
} else {
return error(Status.NOT_FOUND, "Could not find an existing process state for " + yearMonth);
}
}

@POST
@Path("{yearMonth}/processingState")
public Response updateProcessingState(@PathParam("yearMonth") String yearMonth, @QueryParam("state") String state) {
MakeDataCountProcessState mdcps;
try {
mdcps = makeDataCountProcessStateService.setMakeDataCountProcessState(yearMonth, state);
} catch (Exception e) {
return badRequest(e.getMessage());
}

JsonObjectBuilder output = Json.createObjectBuilder();
output.add("yearMonth", mdcps.getYearMonth());
output.add("state", mdcps.getState().name());
output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString());
return ok(output);
}

@DELETE
@Path("{yearMonth}/processingState")
public Response deleteProcessingState(@PathParam("yearMonth") String yearMonth) {
boolean deleted = makeDataCountProcessStateService.deleteMakeDataCountProcessState(yearMonth);
if (deleted) {
return ok("Processing State deleted for " + yearMonth);
} else {
return notFound("Processing State not found for " + yearMonth);
}
}
}
@@ -0,0 +1,75 @@
package edu.harvard.iq.dataverse.makedatacount;

import jakarta.persistence.*;

import java.io.Serializable;
import java.sql.Timestamp;
import java.time.Instant;
import java.util.Arrays;

@Entity
@Table(indexes = {@Index(columnList="yearMonth")})
public class MakeDataCountProcessState implements Serializable {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(nullable = false)
private Long id;

public enum MDCProcessState {
NEW("new"), DONE("done"), SKIP("skip"), PROCESSING("processing"), FAILED("failed");
private final String text;
private MDCProcessState(final String text) {
this.text = text;
}
public static MDCProcessState fromString(String text) {
if (text != null) {
for (MDCProcessState state : MDCProcessState.values()) {
if (text.equals(state.text)) {
return state;
}
}
}
throw new IllegalArgumentException("State must be one of these values: " + Arrays.asList(MDCProcessState.values()) + ".");
}
@Override
public String toString() {
return text;
}
}
@Column(nullable = false)
private String yearMonth;
@Column(nullable = false)
private MDCProcessState state;
@Column(nullable = true)
private Timestamp stateChangeTimestamp;

public MakeDataCountProcessState() { }
public MakeDataCountProcessState (String yearMonth, String state) {
this.setYearMonth(yearMonth);
this.setState(state);
}

public void setYearMonth(String yearMonth) throws IllegalArgumentException {
// Todo: add constraint
if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) {
throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd");
}
this.yearMonth = yearMonth;
}
public String getYearMonth() {
return this.yearMonth;
}
public void setState(MDCProcessState state) {
this.state = state;
this.stateChangeTimestamp = Timestamp.from(Instant.now());
}
public void setState(String state) throws IllegalArgumentException {
setState(MDCProcessState.fromString(state));
}
public MDCProcessState getState() {
return this.state;
}
public Timestamp getStateChangeTime() {
return stateChangeTimestamp;
}
}
@@ -0,0 +1,61 @@
package edu.harvard.iq.dataverse.makedatacount;

import jakarta.ejb.EJBException;
import jakarta.ejb.Stateless;
import jakarta.inject.Named;
import jakarta.persistence.EntityManager;
import jakarta.persistence.PersistenceContext;
import jakarta.persistence.Query;

import java.util.List;

@Named
@Stateless
public class MakeDataCountProcessStateServiceBean {

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;

public MakeDataCountProcessState getMakeDataCountProcessState(String yearMonth) {
validateYearMonth(yearMonth);
MakeDataCountProcessState mdcps = null;
String queryStr = "SELECT d FROM MakeDataCountProcessState d WHERE d.yearMonth = '" + yearMonth + "' ";
Query query = em.createQuery(queryStr);
List resultList = query.getResultList();
if (resultList.size() > 1) {
throw new EJBException("More than one MakeDataCount Process State record found for YearMonth " + yearMonth + ".");
}
if (resultList.size() == 1) {
mdcps = (MakeDataCountProcessState) resultList.get(0);
}
return mdcps;
}

public MakeDataCountProcessState setMakeDataCountProcessState(String yearMonth, String state) {
MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth);
if (mdcps == null) {
mdcps = new MakeDataCountProcessState(yearMonth, state);
} else {
mdcps.setState(state);
}
return em.merge(mdcps);
}

public boolean deleteMakeDataCountProcessState(String yearMonth) {
MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth);
if (mdcps == null) {
return false;
} else {
em.remove(mdcps);
em.flush();
return true;
}
}

private void validateYearMonth(String yearMonth) {
// Check yearMonth format. either yyyy-mm or yyyy-mm-dd
if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) {
throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd");
}
}
}
61 changes: 61 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java
@@ -1,14 +1,21 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState;
import io.restassured.path.json.JsonPath;
import io.restassured.RestAssured;
import io.restassured.response.Response;
import java.io.File;
import java.io.IOException;
import static jakarta.ws.rs.core.Response.Status.CREATED;
import static jakarta.ws.rs.core.Response.Status.OK;
import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
import static jakarta.ws.rs.core.Response.Status.NOT_FOUND;
import org.apache.commons.io.FileUtils;
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;

import org.hamcrest.Matchers;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -171,6 +178,60 @@ public void testMakeDataCountGetMetric() throws IOException {

}

@Test
public void testGetUpdateDeleteProcessingState() {
String yearMonth = "2000-01";
// make sure it isn't in the DB
Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth);
deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404)));

Response getState = UtilIT.makeDataCountGetProcessingState(yearMonth);
getState.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
Response updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.PROCESSING.toString());
updateState.then().assertThat().statusCode(OK.getStatusCode());
getState = UtilIT.makeDataCountGetProcessingState(yearMonth);
getState.then().assertThat().statusCode(OK.getStatusCode());
JsonPath stateJson = JsonPath.from(getState.body().asString());
stateJson.prettyPrint();
String state1 = stateJson.getString("data.state");
assertThat(state1, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.PROCESSING.name()));
String updateTimestamp1 = stateJson.getString("data.stateChangeTimestamp");

updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.DONE.toString());
updateState.then().assertThat().statusCode(OK.getStatusCode());
stateJson = JsonPath.from(updateState.body().asString());
stateJson.prettyPrint();
String state2 = stateJson.getString("data.state");
String updateTimestamp2 = stateJson.getString("data.stateChangeTimestamp");
assertThat(state2, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.DONE.name()));

assertThat(updateTimestamp2, Matchers.is(Matchers.greaterThan(updateTimestamp1)));

deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth);
deleteState.then().assertThat().statusCode(OK.getStatusCode());
}

@Test
public void testUpdateProcessingStateWithInvalidState() {
String yearMonth = "2000-02";
// make sure it isn't in the DB
Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth);
deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404)));

Response stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState");
stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());

stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "new");
stateResponse.then().assertThat().statusCode(OK.getStatusCode());
stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState");
stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
stateResponse = UtilIT.makeDataCountGetProcessingState(yearMonth);
stateResponse.then().assertThat().statusCode(OK.getStatusCode());
JsonPath stateJson = JsonPath.from(stateResponse.body().asString());
String state = stateJson.getString("data.state");
assertThat(state, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.NEW.name()));
}

/**
* Ignore is set on this test because it requires database edits to pass.
* There are currently two citions for doi:10.7910/DVN/HQZOOB but you have
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Expand Up @@ -3135,6 +3135,19 @@ static Response makeDataCountUpdateCitationsForDataset(String idOrPersistentIdOf
return requestSpecification.post("/api/admin/makeDataCount/" + idInPath + "/updateCitationsForDataset"+ optionalQueryParam);
}

static Response makeDataCountGetProcessingState(String yearMonth) {
RequestSpecification requestSpecification = given();
return requestSpecification.get("/api/admin/makeDataCount/" + yearMonth + "/processingState");
}
static Response makeDataCountUpdateProcessingState(String yearMonth, String state) {
RequestSpecification requestSpecification = given();
return requestSpecification.post("/api/admin/makeDataCount/" + yearMonth + "/processingState?state=" + state);
}
static Response makeDataCountDeleteProcessingState(String yearMonth) {
RequestSpecification requestSpecification = given();
return requestSpecification.delete("/api/admin/makeDataCount/" + yearMonth + "/processingState");
}

static Response editDDI(String body, String fileId, String apiToken) {
if (apiToken == null) {
apiToken = "";
Expand Down

0 comments on commit cf1d769

Please sign in to comment.