Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions conf/mdc/counter_weekly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ case $HTTP_STATUS in
# Successfully queued
# Extract status from the nested data object
STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status')

# Extract message from the nested data object
if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then
MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message')
Expand Down Expand Up @@ -89,4 +89,5 @@ done
}

# Call the function on the root dataverse to start processing
processDV 1
processDV 1
echo "Processing Dataverse Complete: $(date)"
80 changes: 62 additions & 18 deletions doc/sphinx-guides/source/_static/util/counter_weekly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

# A recursive method to process each Dataverse
processDV () {
echo "Running counter_weekly.sh on $(date)"
echo "Processing Dataverse ID#: $1"

#Call the Dataverse API to get the contents of the Dataverse (without credentials, this will only list published datasets and dataverses
Expand All @@ -18,23 +17,67 @@ for subds in $(echo "${DVCONTENTS}" | jq -r '.data[] | select(.type == "dataset"
#The authority/identifier are preceded by a protocol/host, i.e. https://doi.org/
DOI=`expr "$subds" : '.*:\/\/\doi\.org\/\(.*\)'`

# Call the Dataverse API for this dataset and get the response
RESULT=$(curl -s -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI" )
# Parse the status and number of citations found from the response
STATUS=$(echo "$RESULT" | jq -j '.status' )
CITATIONS=$(echo "$RESULT" | jq -j '.data.citationCount')

# The status for a call that worked
OK='OK'

# Check the status and report
if [ "$STATUS" = "$OK" ]; then
echo "Updated: $CITATIONS citations for doi:$DOI"
else
echo "Failed to update citations for doi:$DOI"
echo "Run curl -s -X POST 'http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI ' to retry/see the error message"
fi
#processDV $subds
# Call the Dataverse API for this dataset and capture both the response and HTTP status code
HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI")

# Extract the HTTP status code from the last line
HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tail -n1)
# Extract the response body (everything except the last line)
RESPONSE_BODY=$(echo "$HTTP_RESPONSE" | sed '$d')

# Check the HTTP status code and report accordingly
case $HTTP_STATUS in
200)
# Successfully queued
# Extract status from the nested data object
STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status')

# Extract message from the nested data object
if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then
MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message')
echo "[SUCCESS] doi:$DOI - $STATUS: $MESSAGE"
else
# If message is missing or null, just show the status
echo "[SUCCESS] doi:$DOI - $STATUS: Citation update queued"
fi
;;
400)
# Bad request
if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then
ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message')
echo "[ERROR 400] doi:$DOI - Bad request: $ERROR"
else
echo "[ERROR 400] doi:$DOI - Bad request"
fi
;;
404)
# Not found
if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then
ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message')
echo "[ERROR 404] doi:$DOI - Not found: $ERROR"
else
echo "[ERROR 404] doi:$DOI - Not found"
fi
;;
503)
# Service unavailable (queue full)
if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then
ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message')
echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR"
elif echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1; then
ERROR=$(echo "$RESPONSE_BODY" | jq -r '.data.message')
echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR"
else
echo "[ERROR 503] doi:$DOI - Service unavailable: Queue is full"
fi
;;
*)
# Other error
echo "[ERROR $HTTP_STATUS] doi:$DOI - Unexpected error"
echo "Response: $RESPONSE_BODY"
;;
esac

done

# Now iterate over any child Dataverses and recursively process them
Expand All @@ -47,3 +90,4 @@ done

# Call the function on the root dataverse to start processing
processDV 1
echo "Processing Dataverse Complete: $(date)"
12 changes: 7 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ public Response updateCitationsForDataset(@PathParam("id") String id) {
final Dataset dataset = findDatasetOrDie(id);
final GlobalId pid = dataset.getGlobalId();
final PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId());
// PathParam 'id' could be ":persistentId" so we want the actual value to be logged
final String datasetId = PERSISTENT_ID_KEY.equals(id) ? getRequestParameter(PERSISTENT_ID_KEY.substring(1)) : id;

// Only supported for DOIs and for DataCite DOI providers
if (!DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) {
Expand All @@ -168,21 +170,21 @@ public Response updateCitationsForDataset(@PathParam("id") String id) {
lastExecutionTime.set(System.currentTimeMillis());

if (success) {
logger.fine("Successfully processed citation update for dataset " + id);
logger.fine("Successfully processed citation update for dataset " + datasetId);
} else {
logger.warning("Failed to process citation update for dataset " + id);
logger.warning("Failed to process citation update for dataset " + datasetId);
}
} catch (Exception e) {
logger.log(Level.SEVERE, "Error processing citation update for dataset " + id, e);
logger.log(Level.SEVERE, "Error processing citation update for dataset " + datasetId, e);
}
});

JsonObjectBuilder output = Json.createObjectBuilder();
output.add("status", "queued");
output.add("message", "Citation update for dataset " + id + " has been queued for processing");
output.add("message", "Citation update for dataset " + datasetId + " has been queued for processing");
return ok(output);
} catch (RejectedExecutionException ree) {
logger.warning("Citation update for dataset " + id + " was rejected: Queue is full");
logger.warning("Citation update for dataset " + datasetId + " was rejected: Queue is full");
return error(Status.SERVICE_UNAVAILABLE,
"Citation update service is currently at capacity. Please try again later.");
}
Expand Down
Loading