From 0726a75dfbdb5e0e5fb663f5cdcce29e01ee711c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 10 Jun 2020 11:07:33 -0400 Subject: [PATCH 1/2] #6898 update solr docs for all children of linked dvs --- .../iq/dataverse/search/IndexServiceBean.java | 104 +++++++++++------- 1 file changed, 65 insertions(+), 39 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index b6b7df2887a..1ac1beca201 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -266,25 +266,19 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) dataversePaths.add(dvPath); } //only do this if we're indexing an individual dataverse ie not full re-index + List dataverseChildrenIds = new ArrayList(); + List datasetChildrenIds = new ArrayList(); if (processPaths) { //Get Linking Dataverses to see if I need to reindex my children if (hasAnyLinkingDataverses(dataverse)) { - List found = dataverseService.findByOwnerId(dataverse.getId()); - if (!found.isEmpty()) { - for (Dataverse dv : found) { - //if this dataverse or any of its ancestors is linked and contains dataverses then - // the dataverses must be reindexed to get the new paths added - //We're sticking with the re-index here so that the dataverses datasets will also - //get their paths updated - indexDataverseInNewTransaction(dv); - } + dataverseChildrenIds = dataverseService.findAllDataverseDataverseChildren(dataverse.getId()); + datasetChildrenIds = dataverseService.findAllDataverseDatasetChildren(dataverse.getId()); + for (Long id : datasetChildrenIds) { + updatePathForExistingSolrDocs(datasetService.find(id)); } - List datasets = datasetService.findByOwnerId(dataverse.getId()); - for (Dataset ds : datasets) { - //if this dataverse or any of its ancestors is linked and contains datasets then - // the datasets must get the new paths added - // changed from a full re-index for efficiency wrt issue 6665 - updatePathForExistingSolrDocs(ds); + + for (Long id : dataverseChildrenIds) { + updatePathForExistingSolrDocs(dataverseService.find(id)); } } } @@ -891,7 +885,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } - List dataversePaths = retrieveDatasetPaths(dataset); + List dataversePaths = retrieveDVOPaths(dataset); solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, // dataset.getOwner().getName()); @@ -1424,52 +1418,78 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, object.getId().toString())); QueryResponse res = solrClientService.getSolrClient().query(solrQuery); - - if (!res.getResults().isEmpty()) { + Dataset ds = null; + Dataverse dv = null; + if (!res.getResults().isEmpty()) { SolrDocument doc = res.getResults().get(0); SolrInputDocument sid = new SolrInputDocument(); for (String fieldName : doc.getFieldNames()) { sid.addField(fieldName, doc.getFieldValue(fieldName)); } - Dataset ds = datasetService.find(object.getId()); + List paths = new ArrayList(); + + if(object.isInstanceofDataset()){ + ds = datasetService.find(object.getId()); + paths = retrieveDVOPaths(ds); + } + + if(object.isInstanceofDataverse()){ + dv = dataverseService.find(object.getId()); + paths = retrieveDVOPaths(dv); + } + sid.removeField(SearchFields.SUBTREE); - List paths = retrieveDatasetPaths(ds); sid.addField(SearchFields.SUBTREE, paths); UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); - for (DataFile df : ds.getFiles()) { - solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); - res = solrClientService.getSolrClient().query(solrQuery); - if (!res.getResults().isEmpty()) { - doc = res.getResults().get(0); - sid = new SolrInputDocument(); - for (String fieldName : doc.getFieldNames()) { - sid.addField(fieldName, doc.getFieldValue(fieldName)); + if (ds != null) { + for (DataFile df : ds.getFiles()) { + solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); + res = solrClientService.getSolrClient().query(solrQuery); + if (!res.getResults().isEmpty()) { + doc = res.getResults().get(0); + sid = new SolrInputDocument(); + for (String fieldName : doc.getFieldNames()) { + sid.addField(fieldName, doc.getFieldValue(fieldName)); + } + sid.removeField(SearchFields.SUBTREE); + sid.addField(SearchFields.SUBTREE, paths); + addResponse = solrClientService.getSolrClient().add(sid); + commitResponse = solrClientService.getSolrClient().commit(); } - sid.removeField(SearchFields.SUBTREE); - sid.addField(SearchFields.SUBTREE, paths); - addResponse = solrClientService.getSolrClient().add(sid); - commitResponse = solrClientService.getSolrClient().commit(); } } - } else { - indexDataset((Dataset) object, true); - } + } } - private List retrieveDatasetPaths(Dataset dataset) { + private List retrieveDVOPaths(DvObject dvo) { List dataversePathSegmentsAccumulator = new ArrayList<>(); List dataverseSegments = new ArrayList<>(); + Dataset dataset = null; + Dataverse dv = null; try { - dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator); + if(dvo.isInstanceofDataset()){ + dataset = (Dataset) dvo; + dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator); + } + if(dvo.isInstanceofDataverse()){ + dv = (Dataverse) dvo; + dataverseSegments = findPathSegments(dv, dataversePathSegmentsAccumulator); + } } catch (Exception ex) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); // Add Paths for linking dataverses - for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) { + List linkingDataverses = new ArrayList(); + if (dataset != null){ + linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); + } else{ + linkingDataverses = dvLinkingService.findLinkingDataverses(dv.getId()); + } + for (Dataverse linkingDataverse : linkingDataverses) { List linkingDataversePathSegmentsAccumulator = new ArrayList<>(); List linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); List linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); @@ -1479,7 +1499,13 @@ private List retrieveDatasetPaths(Dataset dataset) { } //Add paths for my linking dataverses - List linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner()); + List linkingDataversePaths = new ArrayList(); + if (dataset != null) { + linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner()); + } else { + linkingDataversePaths = findLinkingDataversePaths(dv); + } + for (String dvPath : linkingDataversePaths) { dataversePaths.add(dvPath); } From 8fa1b458075f0670b4ab3de7850c4cc590748ffc Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 10 Jun 2020 17:51:24 -0400 Subject: [PATCH 2/2] #6898 code cleanup --- .../iq/dataverse/search/IndexServiceBean.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 1ac1beca201..b46c368e1d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1418,8 +1418,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, object.getId().toString())); QueryResponse res = solrClientService.getSolrClient().query(solrQuery); - Dataset ds = null; - Dataverse dv = null; + if (!res.getResults().isEmpty()) { SolrDocument doc = res.getResults().get(0); SolrInputDocument sid = new SolrInputDocument(); @@ -1427,24 +1426,16 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc for (String fieldName : doc.getFieldNames()) { sid.addField(fieldName, doc.getFieldValue(fieldName)); } - List paths = new ArrayList(); - - if(object.isInstanceofDataset()){ - ds = datasetService.find(object.getId()); - paths = retrieveDVOPaths(ds); - } - - if(object.isInstanceofDataverse()){ - dv = dataverseService.find(object.getId()); - paths = retrieveDVOPaths(dv); - } + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) + : retrieveDVOPaths(dataverseService.find(object.getId())); + sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); - if (ds != null) { - for (DataFile df : ds.getFiles()) { + if (object.isInstanceofDataset()) { + for (DataFile df : datasetService.find(object.getId()).getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) {