Skip to content

Commit

Permalink
Revert "9686 move harvesting client"
Browse files Browse the repository at this point in the history
  • Loading branch information
landreev committed Jan 25, 2024
1 parent cea3e62 commit 77ba293
Show file tree
Hide file tree
Showing 12 changed files with 112 additions and 151 deletions.
1 change: 0 additions & 1 deletion doc/release-notes/9686-move-harvesting-client-id.md

This file was deleted.

14 changes: 13 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/Dataset.java
Expand Up @@ -752,9 +752,21 @@ public void setDatasetExternalCitations(List<DatasetExternalCitations> datasetEx
this.datasetExternalCitations = datasetExternalCitations;
}

@ManyToOne
@JoinColumn(name="harvestingClient_id")
private HarvestingClient harvestedFrom;


public HarvestingClient getHarvestedFrom() {
return this.harvestedFrom;
}

public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
this.harvestedFrom = harvestingClientConfig;
}

public boolean isHarvested() {
return this.harvestedFrom != null;
}

private String harvestIdentifier;

Expand Down
48 changes: 48 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Expand Up @@ -583,6 +583,54 @@ public Long getDatasetVersionCardImage(Long versionId, User user) {
return null;
}

/**
* Used to identify and properly display Harvested objects on the dataverse page.
*
* @param datasetIds
* @return
*/
public Map<Long, String> getArchiveDescriptionsForHarvestedDatasets(Set<Long> datasetIds){
if (datasetIds == null || datasetIds.size() < 1) {
return null;
}

String datasetIdStr = StringUtils.join(datasetIds, ", ");

String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")";
List<Object[]> searchResults;

try {
searchResults = em.createNativeQuery(qstr).getResultList();
} catch (Exception ex) {
searchResults = null;
}

if (searchResults == null) {
return null;
}

Map<Long, String> ret = new HashMap<>();

for (Object[] result : searchResults) {
Long dsId;
if (result[0] != null) {
try {
dsId = (Long)result[0];
} catch (Exception ex) {
dsId = null;
}
if (dsId == null) {
continue;
}

ret.put(dsId, (String)result[1]);
}
}

return ret;
}



public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) {
if (datasetVersion == null) {
Expand Down
17 changes: 0 additions & 17 deletions src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -1,7 +1,6 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.storageuse.StorageQuota;

Expand Down Expand Up @@ -372,22 +371,6 @@ public GlobalId getGlobalId() {
return globalId;
}

@ManyToOne
@JoinColumn(name="harvestingClient_id")
private HarvestingClient harvestedFrom;

public HarvestingClient getHarvestedFrom() {
return this.harvestedFrom;
}

public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
this.harvestedFrom = harvestingClientConfig;
}

public boolean isHarvested() {
return this.harvestedFrom != null;
}

public abstract <T> T accept(Visitor<T> v);

@Override
Expand Down
48 changes: 0 additions & 48 deletions src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
Expand Up @@ -383,54 +383,6 @@ public Map<Long, String> getObjectPathsByIds(Set<Long> objectIds){
return ret;
}

/**
* Used to identify and properly display Harvested objects on the dataverse page.
*
* @param dvObjectIds
* @return
*/
public Map<Long, String> getArchiveDescriptionsForHarvestedDvObjects(Set<Long> dvObjectIds){

if (dvObjectIds == null || dvObjectIds.size() < 1) {
return null;
}

String dvObjectIsString = StringUtils.join(dvObjectIds, ", ");
String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, DvObject d WHERE d.harvestingClient_id = h.id AND d.id IN (" + dvObjectIsString + ")";
List<Object[]> searchResults;

try {
searchResults = em.createNativeQuery(qstr).getResultList();
} catch (Exception ex) {
searchResults = null;
}

if (searchResults == null) {
return null;
}

Map<Long, String> ret = new HashMap<>();

for (Object[] result : searchResults) {
Long dvObjId;
if (result[0] != null) {
try {
Integer castResult = (Integer) result[0];
dvObjId = Long.valueOf(castResult);
} catch (Exception ex) {
dvObjId = null;
}
if (dvObjId == null) {
continue;
}
ret.put(dvObjId, (String)result[1]);
}
}

return ret;
}


public String generateNewIdentifierByStoredProcedure() {
StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure");
query.execute();
Expand Down
Expand Up @@ -332,11 +332,6 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve

Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString());

//adding the harvesting client id to harvested files #9686
for (DataFile df : ds.getFiles()){
df.setHarvestedFrom(harvestingClient);
}

if (existingDs != null) {
// If this dataset already exists IN ANOTHER DATAVERSE
// we are just going to skip it!
Expand Down
Expand Up @@ -199,8 +199,8 @@ public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCoun

public Long getNumberOfHarvestedDatasetsByAllClients() {
try {
return (Long) em.createNativeQuery("SELECT count(d.id) FROM dvobject d "
+ " WHERE d.harvestingclient_id IS NOT NULL and d.dtype = 'Dataset'").getSingleResult();
return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d "
+ " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult();

} catch (Exception ex) {
logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage());
Expand Down
Expand Up @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
+ "from datasetversion\n"
+ "where versionstate='RELEASED' \n"
+ (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n")
+ ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+ ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "")
+ ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+ ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "")
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
+ (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n")
+ "group by dataset_id) as subq group by subq.date order by date;"
Expand All @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
* @param d
*/
public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n";

if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
//we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n";
String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n";
if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
dataLocationLine = harvestBaseLine; // replace
} else if (DATA_LOCATION_ALL.equals(dataLocation)) {
Expand Down Expand Up @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ "join dvobject on dvobject.id = dataset.id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED' \n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
+ "and \n"
Expand All @@ -198,6 +198,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
+") sub_temp"
);
logger.log(Level.FINE, "Metric query: {0}", query);

return (long) query.getSingleResult();
}

Expand All @@ -206,17 +207,16 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
// A published local datasets may have more than one released version!
// So that's why we have to jump through some extra hoops below
// in order to select the latest one:
String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n"
+ "(\n"
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+ " from datasetversion\n"
+ " join dataset on dataset.id = datasetversion.dataset_id\n"
+ " join dvobject on dataset.id = dvobject.id\n"
+ " where versionstate='RELEASED'\n"
+ " and dvobject.harvestingclient_id is null"
+ " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n"
+ " group by dataset_id\n"
+ "))\n";
String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
"(\n" +
"select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
" from datasetversion\n" +
" join dataset on dataset.id = datasetversion.dataset_id\n" +
" where versionstate='RELEASED'\n" +
" and dataset.harvestingclient_id is null\n" +
" and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" +
" group by dataset_id\n" +
"))\n";

if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
//we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
Expand All @@ -225,7 +225,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
// so the query is simpler:
String harvestOriginClause = "(\n" +
" datasetversion.dataset_id = dataset.id\n" +
" AND dvobject.harvestingclient_id IS NOT null \n" +
" AND dataset.harvestingclient_id IS NOT null \n" +
" AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" +
")\n";

Expand All @@ -244,7 +244,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
+ "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n"
+ "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n"
+ "JOIN dataset ON dataset.id = datasetversion.dataset_id\n"
+ "JOIN dvobject ON dvobject.id = dataset.id\n"
+ ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n")
+ "WHERE\n"
+ originClause
+ "AND datasetfieldtype.name = 'subject'\n"
Expand All @@ -258,11 +258,11 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
}

public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n";
String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n";

if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
//we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n";
String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n";
if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
dataLocationLine = harvestBaseLine; // replace
} else if (DATA_LOCATION_ALL.equals(dataLocation)) {
Expand All @@ -276,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ "join dvobject on dvobject.id = dataset.id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED' \n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and \n"
Expand Down Expand Up @@ -304,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
+ "where datasetversion.id=filemetadata.datasetversion_id\n"
+ "and versionstate='RELEASED' \n"
+ "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
+ "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n "
+ "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
+ ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
+ "group by filemetadata.id) as subq group by subq.date order by date;");
logger.log(Level.FINE, "Metric query: {0}", query);
Expand All @@ -327,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) {
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ "join dvobject on dvobject.id = dataset.id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED'\n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n"
+ "and dvobject.harvestingclient_id is null\n"
+ "and dataset.harvestingclient_id is null\n"
+ "group by dataset_id \n"
+ ");"
);
Expand All @@ -350,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) {
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ "join dvobject on dvobject.id = dataset.id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED'\n"
+ "and releasetime > current_date - interval '" + days + "' day\n"
+ ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and dvobject.harvestingclient_id is null\n"
+ "and dataset.harvestingclient_id is null\n"
+ "group by dataset_id \n"
+ ");"
);
Expand Down

0 comments on commit 77ba293

Please sign in to comment.