Skip to content

Commit

Permalink
#8097 speed up file indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
sekmiller committed Sep 28, 2021
1 parent 44a674a commit 45ccdff
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 14 deletions.
21 changes: 18 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/FileVersionDifference.java
Expand Up @@ -10,7 +10,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.ResourceBundle;

/**
*
Expand Down Expand Up @@ -50,7 +49,7 @@ public boolean compareMetadata(FileMetadata newFileMetadata, FileMetadata origin
and it updates the FileVersionDifference object which is used to display the differences on the dataset versions tab.
The return value is used by the index service bean tomark whether a file needs to be re-indexed in the context of a dataset update.
When there are changes (after v4.19)to the file metadata data model this method must be updated.
retVal of True means metadatas are equal.
retVal of True means metadatas are equal.
*/

boolean retVal = true;
Expand All @@ -68,13 +67,15 @@ When there are changes (after v4.19)to the file metadata data model this method

if (this.originalFileMetadata == null && this.newFileMetadata.getDataFile() != null ){
//File Added
if (!details) return false;
retVal = false;
updateDifferenceSummary( "", BundleUtil.getStringFromBundle("file.versionDifferences.fileGroupTitle"), 1, 0, 0, 0);
}

//Check to see if File replaced
if (originalFileMetadata != null &&
newFileMetadata.getDataFile() != null && originalFileMetadata.getDataFile() != null &&!this.originalFileMetadata.getDataFile().equals(this.newFileMetadata.getDataFile())){
if (!details) return false;
updateDifferenceSummary( "", BundleUtil.getStringFromBundle("file.versionDifferences.fileGroupTitle"), 0, 0, 0, 1);
retVal = false;
}
Expand All @@ -83,6 +84,8 @@ When there are changes (after v4.19)to the file metadata data model this method
if (!newFileMetadata.getLabel().equals(originalFileMetadata.getLabel())) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.fileNameDetailTitle"), originalFileMetadata.getLabel(), newFileMetadata.getLabel()));
} else{
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.fileNameDetailTitle"), 0, 1, 0, 0);
Expand All @@ -97,6 +100,8 @@ When there are changes (after v4.19)to the file metadata data model this method
&& !newFileMetadata.getDescription().equals(originalFileMetadata.getDescription())) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), originalFileMetadata.getDescription(), newFileMetadata.getDescription()));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), 0, 1, 0, 0);
Expand All @@ -107,6 +112,8 @@ When there are changes (after v4.19)to the file metadata data model this method
) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), "", newFileMetadata.getDescription()));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), 1, 0, 0, 0);
Expand All @@ -117,6 +124,8 @@ When there are changes (after v4.19)to the file metadata data model this method
) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), originalFileMetadata.getDescription(), "" ));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.descriptionDetailTitle"), 0, 0, 1, 0);
Expand All @@ -130,6 +139,8 @@ When there are changes (after v4.19)to the file metadata data model this method
&& !newFileMetadata.getProvFreeForm().equals(originalFileMetadata.getProvFreeForm())) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), originalFileMetadata.getProvFreeForm(), newFileMetadata.getProvFreeForm()));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), 0, 1, 0, 0);
Expand All @@ -140,6 +151,8 @@ When there are changes (after v4.19)to the file metadata data model this method
) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), "", newFileMetadata.getProvFreeForm()));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), 1, 0, 0, 0);
Expand All @@ -150,6 +163,8 @@ When there are changes (after v4.19)to the file metadata data model this method
) {
if (details) {
differenceDetailItems.add(new FileDifferenceDetailItem(BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), originalFileMetadata.getProvFreeForm(), "" ));
} else {
return false;
}
updateDifferenceSummary(BundleUtil.getStringFromBundle("file.versionDifferences.fileMetadataGroupTitle"),
BundleUtil.getStringFromBundle("file.versionDifferences.provenanceDetailTitle"), 0, 0, 1, 0);
Expand All @@ -170,7 +185,7 @@ When there are changes (after v4.19)to the file metadata data model this method
}

if (!value1.equals(value2)) {

if (!details) return false;
int added = 0;
int deleted = 0;

Expand Down
40 changes: 29 additions & 11 deletions src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
Expand Up @@ -2,6 +2,7 @@

import edu.harvard.iq.dataverse.ControlledVocabularyValue;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFileServiceBean;
import edu.harvard.iq.dataverse.DataFileTag;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
Expand Down Expand Up @@ -117,6 +118,8 @@ public class IndexServiceBean {
SettingsServiceBean settingsService;
@EJB
SolrClientService solrClientService;
@EJB
DataFileServiceBean dataFileService;

@EJB
VariableServiceBean variableService;
Expand Down Expand Up @@ -937,22 +940,35 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
logger.fine(
"We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}
Date date=java.util.Calendar.getInstance().getTime();
System.out.print("Start file check: " + date );
for (FileMetadata fileMetadata : fileMetadatas) {

boolean indexThisMetadata = true;

if (checkForDuplicateMetadata) {

logger.fine("Checking if this file metadata is a duplicate.");
logger.fine("Checking if this file metadata is a duplicate.");
if (fileMetadata.getDataFile() != null) {
FileMetadata findReleasedFileMetadata = dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(dataset.getReleasedVersion().getId(), fileMetadata.getDataFile().getId());
if (findReleasedFileMetadata != null) {
if ((fileMetadata.getDataFile().isRestricted() == findReleasedFileMetadata.getDataFile().isRestricted())) {
if (fileMetadata.contentEquals(findReleasedFileMetadata)
&& variableMetadataUtil.compareVariableMetadata(findReleasedFileMetadata, fileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
logger.fine("This file metadata has changed since the released version; we want to index it!");
}
} else {
logger.fine("This file's restricted status has changed since the released version; we want to index it!");
}
}
}

/*
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
/*
* Duplicate if metadata matches and, for full text indexing and the
* SearchFields.ACCESS field, if the restricted status of the file hasn't
* changed. To address the case where full text indexing was on when a file was
* not restricted and it is now restricted and full text indexing has been shut
* off, we need to check for the change in restricted status regardless of
* whether full text indexing is on now.
*/
if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) {
if (fileMetadata.contentEquals(releasedFileMetadata)
&& variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata)
Expand All @@ -968,6 +984,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
break;
}
}
*/
}
if (indexThisMetadata) {

Expand Down Expand Up @@ -1242,7 +1259,8 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
}
}
}

Date date=java.util.Calendar.getInstance().getTime();
System.out.print("End file check: " + date );
try {
solrClientService.getSolrClient().add(docs);
solrClientService.getSolrClient().commit();
Expand Down

0 comments on commit 45ccdff

Please sign in to comment.