IQSS · landreev · Mar 27, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 20, 2024
diff --git a/doc/release-notes/10381-index-after-publish.md b/doc/release-notes/10381-index-after-publish.md
@@ -0,0 +1,3 @@
+New release adds a new microprofile setting for maximum number of simultaneously running asynchronous dataset index operations that defaults to ``4``:
+
+dataverse.solr.concurrency.max-async-indexes
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
@@ -2352,6 +2352,15 @@ when using it to configure your core name!
 
 Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``.
 
+dataverse.solr.concurrency.max-async-indexes
+++++++++++++++++++++++++++++++++++++++++++++
+
+Maximum number of simultaneously running asynchronous dataset index operations.
+
+Defaults to ``4``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CONCURRENCY_MAX_ASYNC_INDEXES``.
+
 dataverse.rserve.host
 +++++++++++++++++++++
 

diff --git a/.../java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/.../java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
@@ -247,6 +247,12 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
 
         logger.info("Successfully published the dataset "+readyDataset.getGlobalId().asString());
         readyDataset = ctxt.em().merge(readyDataset);
+
+        try {
+            ctxt.index().indexDataset(readyDataset, true);
+        } catch (SolrServerException | IOException e) {
+            throw new CommandException("Indexing failed: " + e.getMessage(), this);
+        }
 
         return readyDataset;
     }
@@ -267,7 +273,6 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
         } catch (Exception e) {
             logger.warning("Failure to send dataset published messages for : " + dataset.getId() + " : " + e.getMessage());
         }
-        ctxt.index().asyncIndexDataset(dataset, true);                   
 
         //re-indexing dataverses that have additional subjects
         if (!dataversesToIndex.isEmpty()){

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -35,6 +35,7 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Future;
+import java.util.concurrent.Semaphore;
 import java.util.function.Function;
 import java.util.logging.Logger;
 import java.util.stream.Collectors;
@@ -341,6 +342,8 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) {
     private static final Map<Long, Dataset> NEXT_TO_INDEX = new ConcurrentHashMap<>();
     // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now
     private static final Map<Long, Boolean> INDEXING_NOW = new ConcurrentHashMap<>();
+    // semaphore for async indexing
+    private static final Semaphore ASYNC_INDEX_SEMAPHORE = new Semaphore(JvmSettings.MAX_ASYNC_INDEXES.lookupOptional(Integer.class).orElse(4), true);
 
     // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished
     // Pass non-null Dataset to schedule it for indexing
@@ -385,6 +388,19 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) {
      */
     @Asynchronous
     public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
+        try {
+            ASYNC_INDEX_SEMAPHORE.acquire();
+            doAyncIndexDataset(dataset, doNormalSolrDocCleanUp);
+        } catch (InterruptedException e) {
+            String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString();
+            failureLogText += "\r\n" + e.getLocalizedMessage();
+            LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset);
+        } finally {
+            ASYNC_INDEX_SEMAPHORE.release();
+        }
+    }
+
+    private void doAyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
         Long id = dataset.getId();
         Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes)
         while (next != null) {
@@ -402,7 +418,16 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
     @Asynchronous
     public void asyncIndexDatasetList(List<Dataset> datasets, boolean doNormalSolrDocCleanUp) {
         for(Dataset dataset : datasets) {
-            asyncIndexDataset(dataset, true);
+            try {
+                ASYNC_INDEX_SEMAPHORE.acquire();
+                doAyncIndexDataset(dataset, true);
+            } catch (InterruptedException e) {
+                String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString();
+                failureLogText += "\r\n" + e.getLocalizedMessage();
+                LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset);
+            } finally {
+                ASYNC_INDEX_SEMAPHORE.release();
+            }
         }
     }
 
@@ -414,7 +439,7 @@ public void indexDvObject(DvObject objectIn) throws  SolrServerException, IOExce
         }
     }
 
-    private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws  SolrServerException, IOException {
+    public void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws  SolrServerException, IOException {
         doIndexDataset(dataset, doNormalSolrDocCleanUp);
         updateLastIndexedTime(dataset.getId());
     }

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -60,6 +60,10 @@ public enum JvmSettings {
     SOLR_CORE(SCOPE_SOLR, "core"),
     SOLR_PATH(SCOPE_SOLR, "path"),
 
+    // INDEX CONCURENCY
+    SCOPE_SOLR_CONCURENCY(SCOPE_SOLR, "concurrency"),
+    MAX_ASYNC_INDEXES(SCOPE_SOLR_CONCURENCY, "max-async-indexes"),
+
     // RSERVE CONNECTION
     SCOPE_RSERVE(PREFIX, "rserve"),
     RSERVE_HOST(SCOPE_RSERVE, "host"),