diff --git a/conf/mdc/counter_weekly.sh b/conf/mdc/counter_weekly.sh index 67cb5df2af2..79f8c534a33 100644 --- a/conf/mdc/counter_weekly.sh +++ b/conf/mdc/counter_weekly.sh @@ -31,7 +31,7 @@ case $HTTP_STATUS in # Successfully queued # Extract status from the nested data object STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status') - + # Extract message from the nested data object if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message') @@ -89,4 +89,5 @@ done } # Call the function on the root dataverse to start processing -processDV 1 \ No newline at end of file +processDV 1 +echo "Processing Dataverse Complete: $(date)" diff --git a/doc/sphinx-guides/source/_static/util/counter_weekly.sh b/doc/sphinx-guides/source/_static/util/counter_weekly.sh index 7cc9e31d86e..c487bfb35aa 100644 --- a/doc/sphinx-guides/source/_static/util/counter_weekly.sh +++ b/doc/sphinx-guides/source/_static/util/counter_weekly.sh @@ -6,7 +6,6 @@ # A recursive method to process each Dataverse processDV () { -echo "Running counter_weekly.sh on $(date)" echo "Processing Dataverse ID#: $1" #Call the Dataverse API to get the contents of the Dataverse (without credentials, this will only list published datasets and dataverses @@ -18,23 +17,67 @@ for subds in $(echo "${DVCONTENTS}" | jq -r '.data[] | select(.type == "dataset" #The authority/identifier are preceded by a protocol/host, i.e. https://doi.org/ DOI=`expr "$subds" : '.*:\/\/\doi\.org\/\(.*\)'` -# Call the Dataverse API for this dataset and get the response -RESULT=$(curl -s -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI" ) -# Parse the status and number of citations found from the response -STATUS=$(echo "$RESULT" | jq -j '.status' ) -CITATIONS=$(echo "$RESULT" | jq -j '.data.citationCount') - -# The status for a call that worked -OK='OK' - -# Check the status and report -if [ "$STATUS" = "$OK" ]; then - echo "Updated: $CITATIONS citations for doi:$DOI" -else - echo "Failed to update citations for doi:$DOI" - echo "Run curl -s -X POST 'http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI ' to retry/see the error message" -fi -#processDV $subds +# Call the Dataverse API for this dataset and capture both the response and HTTP status code +HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI") + +# Extract the HTTP status code from the last line +HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tail -n1) +# Extract the response body (everything except the last line) +RESPONSE_BODY=$(echo "$HTTP_RESPONSE" | sed '$d') + +# Check the HTTP status code and report accordingly +case $HTTP_STATUS in + 200) + # Successfully queued + # Extract status from the nested data object + STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status') + + # Extract message from the nested data object + if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then + MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message') + echo "[SUCCESS] doi:$DOI - $STATUS: $MESSAGE" + else + # If message is missing or null, just show the status + echo "[SUCCESS] doi:$DOI - $STATUS: Citation update queued" + fi + ;; + 400) + # Bad request + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 400] doi:$DOI - Bad request: $ERROR" + else + echo "[ERROR 400] doi:$DOI - Bad request" + fi + ;; + 404) + # Not found + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 404] doi:$DOI - Not found: $ERROR" + else + echo "[ERROR 404] doi:$DOI - Not found" + fi + ;; + 503) + # Service unavailable (queue full) + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR" + elif echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.data.message') + echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR" + else + echo "[ERROR 503] doi:$DOI - Service unavailable: Queue is full" + fi + ;; + *) + # Other error + echo "[ERROR $HTTP_STATUS] doi:$DOI - Unexpected error" + echo "Response: $RESPONSE_BODY" + ;; +esac + done # Now iterate over any child Dataverses and recursively process them @@ -47,3 +90,4 @@ done # Call the function on the root dataverse to start processing processDV 1 +echo "Processing Dataverse Complete: $(date)" diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 892352e2794..de8a5b6523d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -147,6 +147,8 @@ public Response updateCitationsForDataset(@PathParam("id") String id) { final Dataset dataset = findDatasetOrDie(id); final GlobalId pid = dataset.getGlobalId(); final PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + // PathParam 'id' could be ":persistentId" so we want the actual value to be logged + final String datasetId = PERSISTENT_ID_KEY.equals(id) ? getRequestParameter(PERSISTENT_ID_KEY.substring(1)) : id; // Only supported for DOIs and for DataCite DOI providers if (!DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) { @@ -168,21 +170,21 @@ public Response updateCitationsForDataset(@PathParam("id") String id) { lastExecutionTime.set(System.currentTimeMillis()); if (success) { - logger.fine("Successfully processed citation update for dataset " + id); + logger.fine("Successfully processed citation update for dataset " + datasetId); } else { - logger.warning("Failed to process citation update for dataset " + id); + logger.warning("Failed to process citation update for dataset " + datasetId); } } catch (Exception e) { - logger.log(Level.SEVERE, "Error processing citation update for dataset " + id, e); + logger.log(Level.SEVERE, "Error processing citation update for dataset " + datasetId, e); } }); JsonObjectBuilder output = Json.createObjectBuilder(); output.add("status", "queued"); - output.add("message", "Citation update for dataset " + id + " has been queued for processing"); + output.add("message", "Citation update for dataset " + datasetId + " has been queued for processing"); return ok(output); } catch (RejectedExecutionException ree) { - logger.warning("Citation update for dataset " + id + " was rejected: Queue is full"); + logger.warning("Citation update for dataset " + datasetId + " was rejected: Queue is full"); return error(Status.SERVICE_UNAVAILABLE, "Citation update service is currently at capacity. Please try again later."); }