Skip to content

Commit

Permalink
Make licenses searchable and facetable (#10204)
Browse files Browse the repository at this point in the history
* #9060 #7482 License of dataset and datafile are now indexed, searchable in API and filterable in GUI facets

* #9060 #7482 Fix NullPointer possible case and IndexServiceBeanTest issues

* #9060 #7482 Adding release note

* #9060 #7482 review + added i18n on license facet

---------

Co-authored-by: jeromeroucou <jeromeroucou@users.noreply.github.com>
  • Loading branch information
luddaniel and jeromeroucou committed Feb 29, 2024
1 parent dcd7c22 commit b62c123
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 15 deletions.
2 changes: 2 additions & 0 deletions conf/solr/9.3.0/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@

<!-- incomplete datasets issue 8822 -->
<field name="datasetValid" type="boolean" stored="true" indexed="true" multiValued="false"/>

<field name="license" type="string" stored="true" indexed="true" multiValued="false"/>

<!--
METADATA SCHEMA FIELDS
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Search by License

A browse/search facet called "License" has been added and will be displayed as long as there is more than one license in datasets and datafiles in browse/search results. This facet allow you to filter by license such as CC0, etc.
Also, the Search API now handles license filtering using the `fq` parameter, for example : `/api/search?q=*&fq=license%3A%22CC0+1.0%22` for CC0 1.0. See PR #10204


Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private List<String> getSolrFilterQueries(boolean totalCountsOnly){
//fq=publicationStatus:"Unpublished"&fq=publicationStatus:"Draft"

// -----------------------------------------------------------------
// (4) FQ by dataset metadata vlidity
// (4) FQ by dataset metadata validity
// -----------------------------------------------------------------
filterQueries.add(this.filterParams.getSolrFragmentForDatasetValidity());
//fq=datasetValid:(true OR false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand Down Expand Up @@ -784,6 +783,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId().toString());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");

boolean valid;
if (!indexableDataset.getDatasetVersion().isDraft()) {
valid = true;
Expand Down Expand Up @@ -855,6 +855,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {

addLicenseToSolrDoc(solrInputDocument, datasetVersion);

solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
Expand Down Expand Up @@ -1242,6 +1244,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);

datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
addLicenseToSolrDoc(datafileSolrInputDocument, datasetVersion);

/**
* for rules on sorting files see
Expand Down Expand Up @@ -1611,6 +1614,16 @@ private List<String> getDataversePathsFromSegments(List<String> dataversePathSeg
return subtrees;
}

private void addLicenseToSolrDoc(SolrInputDocument solrInputDocument, DatasetVersion datasetVersion) {
if (datasetVersion != null && datasetVersion.getTermsOfUseAndAccess() != null) {
String licenseName = "Custom Terms";
if(datasetVersion.getTermsOfUseAndAccess().getLicense() != null) {
licenseName = datasetVersion.getTermsOfUseAndAccess().getLicense().getName();
}
solrInputDocument.addField(SearchFields.DATASET_LICENSE, licenseName);
}
}

private void addDataverseReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataverse dataverse) {
if (dataverse.getPublicationDate() != null) {
Calendar calendar = Calendar.getInstance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,6 @@ more targeted results for just datasets. The format is YYYY (i.e.

public static final String DATASET_VALID = "datasetValid";

public static final String DATASET_LICENSE = "license";

}
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,12 @@ public List<String> getFriendlyNamesFromFilterQuery(String filterQuery) {
friendlyNames.add(friendlyName.get());
return friendlyNames;
}
} else if (key.equals(SearchFields.DATASET_LICENSE)) {
try {
friendlyNames.add(BundleUtil.getStringFromPropertyFile("license." + valueWithoutQuotes.toLowerCase().replace(" ","_") + ".name", "License"));
} catch (Exception e) {
logger.fine(String.format("action=getFriendlyNamesFromFilterQuery cannot find friendlyName for key=%s value=%s", key, value));
}
}

friendlyNames.add(valueWithoutQuotes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ public SolrQueryResponse search(
solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
solrQuery.addFacetField(SearchFields.DATASET_LICENSE);
/**
* @todo when a new method on datasetFieldService is available
* (retrieveFacetsByDataverse?) only show the facets that the
Expand Down Expand Up @@ -712,10 +713,12 @@ public SolrQueryResponse search(
boolean unpublishedAvailable = false;
boolean deaccessionedAvailable = false;
boolean hideMetadataSourceFacet = true;
boolean hideLicenseFacet = true;
for (FacetField facetField : queryResponse.getFacetFields()) {
FacetCategory facetCategory = new FacetCategory();
List<FacetLabel> facetLabelList = new ArrayList<>();
int numMetadataSources = 0;
int numLicenses = 0;
String metadataBlockName = "";
String datasetFieldName = "";
/**
Expand All @@ -741,23 +744,29 @@ public SolrQueryResponse search(
// logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")");
String localefriendlyName = null;
if (facetFieldCount.getCount() > 0) {
if(metadataBlockName.length() > 0 ) {
localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName);
if(metadataBlockName.length() > 0 ) {
localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName);
} else if (facetField.getName().equals(SearchFields.METADATA_TYPES)) {
Optional<DataverseMetadataBlockFacet> metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst();
if (metadataBlockFacet.isEmpty()) {
Optional<DataverseMetadataBlockFacet> metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst();
if (metadataBlockFacet.isEmpty()) {
// metadata block facet is not configured to be displayed => ignore
continue;
}
}

localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet();
} else {
try {
localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet();
} else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) {
try {
localefriendlyName = BundleUtil.getStringFromPropertyFile("license." + facetFieldCount.getName().toLowerCase().replace(" ","_") + ".name", "License");
} catch (Exception e) {
localefriendlyName = facetFieldCount.getName();
}
} else {
try {
localefriendlyName = BundleUtil.getStringFromPropertyFile(facetFieldCount.getName(), "Bundle");
} catch (Exception e) {
} catch (Exception e) {
localefriendlyName = facetFieldCount.getName();
}
}
}
}
FacetLabel facetLabel = new FacetLabel(localefriendlyName, facetFieldCount.getCount());
// quote field facets
facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\"");
Expand All @@ -770,15 +779,19 @@ public SolrQueryResponse search(
} else if (facetFieldCount.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) {
deaccessionedAvailable = true;
}
}
if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) {
} else if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) {
numMetadataSources++;
} else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) {
numLicenses++;
}
}
}
if (numMetadataSources > 1) {
hideMetadataSourceFacet = false;
}
if (numLicenses > 1) {
hideLicenseFacet = false;
}
facetCategory.setName(facetField.getName());
// hopefully people will never see the raw facetField.getName() because it may well have an _s at the end
facetCategory.setFriendlyName(facetField.getName());
Expand Down Expand Up @@ -855,6 +868,10 @@ public SolrQueryResponse search(
if (!hideMetadataSourceFacet) {
facetCategoryList.add(facetCategory);
}
} else if (facetCategory.getName().equals(SearchFields.DATASET_LICENSE)) {
if (!hideLicenseFacet) {
facetCategoryList.add(facetCategory);
}
} else {
facetCategoryList.add(facetCategory);
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/propertyFiles/License.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Ded
license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License.
license.cc0_1.0.name=CC0 1.0
license.cc_by_4.0.name=CC BY 4.0
license.custom_terms.name=Custom Terms

1 change: 1 addition & 0 deletions src/main/java/propertyFiles/staticSearchFields.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ staticSearchFields.metadata_type_ss=Dataset Feature
staticSearchFields.dvCategory=Dataverse Category
staticSearchFields.metadataSource=Metadata Source
staticSearchFields.publicationDate=Publication Year
staticSearchFields.license=License
staticSearchFields.fileTypeGroupFacet=File Type
staticSearchFields.dvObjectType=Type
staticSearchFields.fileTag=File Tag
Expand Down

0 comments on commit b62c123

Please sign in to comment.