Skip to content

Commit

Permalink
Merge pull request #1738 from NCEAS/feature-1716-mn-reindex
Browse files Browse the repository at this point in the history
Feature 1716-mn-reindex
  • Loading branch information
taojing2002 committed Nov 29, 2023
2 parents 301be34 + c74c522 commit 506336b
Show file tree
Hide file tree
Showing 6 changed files with 463 additions and 377 deletions.
319 changes: 0 additions & 319 deletions src/edu/ucsb/nceas/metacat/MetacatHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.List;
import java.util.Timer;
import java.util.Vector;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
Expand All @@ -31,15 +28,13 @@

import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;

import edu.ucsb.nceas.metacat.common.resourcemap.ResourceMapNamespaces;
import edu.ucsb.nceas.metacat.database.DBConnection;
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
import edu.ucsb.nceas.metacat.dataone.D1NodeService;
import edu.ucsb.nceas.metacat.dataone.SystemMetadataFactory;
import edu.ucsb.nceas.metacat.event.MetacatDocumentEvent;
import edu.ucsb.nceas.metacat.event.MetacatEventService;
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
import edu.ucsb.nceas.metacat.index.queue.IndexGenerator;
import edu.ucsb.nceas.metacat.properties.PropertyService;
import edu.ucsb.nceas.metacat.service.XMLSchemaService;
import edu.ucsb.nceas.metacat.shared.HandlerException;
Expand Down Expand Up @@ -954,320 +949,6 @@ protected void handleBuildIndexAction(
sendNotSupportMessage(response);
}

/**
* Rebuild the index for one or more documents. If the "pid" parameter is provided, rebuild for
* just that one document (or list of documents). If not, an error message will be returned.
*
* @param params the parameters from the web request
* @param request the http request object for getting request details
* @param response the http response object for writing output
* @param username the username of the authenticated user
*/
protected void handleReindexAction(
Hashtable<String, String[]> params, HttpServletRequest request,
HttpServletResponse response, String username, String[] groups) {

// Get all of the parameters in the correct formats
String[] pid = params.get("pid");
PrintWriter out = null;
// Process the documents
StringBuffer results = new StringBuffer();
// Rebuild the indices for appropriate documents
try {
response.setContentType("text/xml");
out = response.getWriter();

if (pid == null || pid.length == 0) {
//report the error
results = new StringBuffer();
results.append("<error>");
results.append("The parameter - pid is missing. Please check your parameter list.");
results.append("</error>");
//out.close(); it will be closed in the finally statement
return;
}
// TODO: Check that the user is allowed to reindex this object, allow everyone for
// open annotations
boolean isAuthorized = true;
String docid = IdentifierManager.getInstance().getLocalId(pid[0]);
isAuthorized = DocumentImpl.hasWritePermission(username, groups, docid);
if (!isAuthorized) {
isAuthorized = AuthUtil.isAdministrator(username, groups);
}


if (!isAuthorized) {
results.append("<error>");
results.append("The user \"").append(username);
results.append("\" is not authorized for this action.");
results.append("</error>");
//out.close(); it will be closed in the finally statement
return;
}


Vector<String> successList = new Vector<String>();
Vector<String> failedList = new Vector<String>();

// Only process the requested documents
for (int i = 0; i < pid.length; i++) {
String id = pid[i];
logMetacat.info("queueing doc index for pid " + id);
Identifier identifier = new Identifier();
identifier.setValue(id);
SystemMetadata sysMeta = SystemMetadataManager.getInstance().get(identifier);
if (sysMeta == null) {
failedList.add(id);
logMetacat.info("no system metadata was found for pid " + id);
} else {
try {
// submit for indexing
MetacatSolrIndex.getInstance().submit(identifier, sysMeta, false);
} catch (Exception e) {
failedList.add(id);
logMetacat.info("Error submitting to index for pid " + id);
continue;
}
successList.add(id);
logMetacat.info("done queueing doc index for pid " + id);
}
}
results.append("<results>\n");
if (successList.size() > 0) {
results.append("<success>\n");
for (String id : successList) {
results.append("<pid>").append(id).append("</pid>\n");
}
results.append(
"<note>The object(s) was/were submitted to the index queue successfully. "
+ "However, this doesn't mean they were indexed successfully.</note>");
results.append("</success>");
}

if (failedList.size() > 0) {
results.append("<error>\n");
for (String id : failedList) {
results.append("<pid>").append(id).append("</pid>\n");
}
results.append(
"<note>The object(s) couldn't be submitted to the index queue.</note>");
results.append("</error>");
}
results.append("</results>\n");

} catch (Exception e) {
logMetacat.error("MetacatHandler.handleReindex action - " + e.getMessage());
e.printStackTrace();
results.append("<error>");
results.append("There was an error - ").append(e.getMessage());
results.append("</error>");
} finally {
logMetacat.debug("================= in the finally statement");
if (out != null) {
logMetacat.debug(
"================= in the finally statement which out is not null");
out.print(results.toString());
out.close();
}
}
}


/**
* Rebuild the index for all documents in the systemMetadata table.
*
* @param params the parameters from the web request
* @param request the http request object for getting request details
* @param response the http response object for writing output
* @param username the username of the authenticated user
*/
protected void handleReindexAllAction(
Hashtable<String, String[]> params, HttpServletRequest request,
HttpServletResponse response, String username, String[] groups) {


// Rebuild the indices for all documents which are in the systemmetadata table
PrintWriter out = null;
// Process the documents
StringBuffer results = new StringBuffer();
try {
response.setContentType("text/xml");
out = response.getWriter();

// Check that the user is authenticated as an administrator account
if (!AuthUtil.isAdministrator(username, groups)) {
out.print("<error>");
out.print("The user \"" + username + "\" is not authorized for this action.");
out.print("</error>");
out.close();
return;
}

// Process all of the documents
logMetacat.info("queueing doc index for all documents");
try {
Runnable indexAll = new Runnable() {
public void run() {
List<String> resourceMapFormats = ResourceMapNamespaces.getNamespaces();
//System.out.println("MetacatHandler.handleReindexAllAction - the
// resource map format list is "+resourceMapFormats);
buildAllNonResourceMapIndex(resourceMapFormats);
buildAllResourceMapIndex(resourceMapFormats);

}
};
Thread thread = new Thread(indexAll);
thread.start();
results.append("<success>");
results.append(
"The indexall action was accepted by the Metacat and it is working on the "
+ "background right now. It doesn't guarantee all objects will be "
+ "reindexed successfully. You may monitor the process through the "
+ "Metacat log file.");
results.append("</success>");
logMetacat.info("done queueing index for all documents");
} catch (Exception e) {
// report the error
results = new StringBuffer();
results.append("<error>");
results.append(e.getMessage());
results.append("</error>");
}

} catch (IOException e) {
logMetacat.error("MetacatHandler.handleBuildIndexAction - "
+ "Could not open http response for writing: " + e.getMessage());
e.printStackTrace();
} catch (MetacatUtilException ue) {
logMetacat.error("MetacatHandler.handleBuildIndexAction - "
+ "Could not determine if user is administrator: "
+ ue.getMessage());
ue.printStackTrace();
} finally {
if (out != null) {
out.print(results.toString());
out.close();
}

}
}


/*
* Index all non-resourcemap objects first. We don't put the list of pids in a vector anymore.
*/
private void buildAllNonResourceMapIndex(List<String> resourceMapFormatList) {
boolean firstTime = true;
String sql = "select guid from systemmetadata";
if (resourceMapFormatList != null && resourceMapFormatList.size() > 0) {
for (String format : resourceMapFormatList) {
if (format != null && !format.trim().equals("")) {
if (firstTime) {
sql = sql + " where object_format !='" + format + "'";
firstTime = false;
} else {
sql = sql + " and object_format !='" + format + "'";
}
}

}
sql = sql + " order by date_uploaded asc";
}
logMetacat.info("MetacatHandler.buildAllNonResourceMapIndex - the final query is " + sql);
try {
long size = buildIndexFromQuery(sql);
logMetacat.info(
"MetacatHandler.buildAllNonResourceMapIndex - the number of non-resource map "
+ "objects is "
+ size + " being submitted to the index queue.");
} catch (Exception e) {
logMetacat.error(
"MetacatHandler.buildAllNonResourceMapIndex - can't index the objects since: "
+ e.getMessage());
}
}

/*
* Index all resource map objects. We don't put the list of pids in a vector anymore.
*/
private void buildAllResourceMapIndex(List<String> resourceMapFormatList) {
String sql = "select guid from systemmetadata";
if (resourceMapFormatList != null && resourceMapFormatList.size() > 0) {
boolean firstTime = true;
for (String format : resourceMapFormatList) {
if (format != null && !format.trim().equals("")) {
if (firstTime) {
sql = sql + " where object_format ='" + format + "'";
firstTime = false;
} else {
sql = sql + " or object_format ='" + format + "'";
}
}
}
sql = sql + " order by date_uploaded asc";
}
logMetacat.info("MetacatHandler.buildAllResourceMapIndex - the final query is " + sql);
try {
long size = buildIndexFromQuery(sql);
logMetacat.info(
"MetacatHandler.buildAllResourceMapIndex - the number of resource map objects is "
+ size + " being submitted to the index queue.");
} catch (Exception e) {
logMetacat.error(
"MetacatHandler.buildAllResourceMapIndex - can't index the objects since: "
+ e.getMessage());
}
}

/*
* Build index of objects selecting from the given sql query.
*/
private long buildIndexFromQuery(String sql) throws SQLException {
DBConnection dbConn = null;
long i = 0;
int serialNumber = -1;
try {
// Get a database connection from the pool
dbConn = DBConnectionPool.getDBConnection("MetacatHandler.buildIndexFromQuery");
serialNumber = dbConn.getCheckOutSerialNumber();
PreparedStatement stmt = dbConn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery();
while (rs.next()) {
String guid = null;
try {
guid = rs.getString(1);
Identifier identifier = new Identifier();
identifier.setValue(guid);
SystemMetadata sysMeta = SystemMetadataManager.getInstance().get(identifier);
if (sysMeta != null) {
// submit for indexing
boolean isSysmetaChangeOnly = false;
boolean followRevisions = false;
MetacatSolrIndex.getInstance()
.submit(identifier, sysMeta, isSysmetaChangeOnly, followRevisions,
IndexGenerator.LOW_PRIORITY);
i++;
logMetacat.debug("MetacatHandler.buildIndexFromQuery - queued "
+ "SystemMetadata for indexing in the "
+ "buildIndexFromQuery on pid: " + guid);
}
} catch (Exception ee) {
logMetacat.warn(
"MetacatHandler.buildIndexFromQuery - can't queue the object " + guid
+ " for indexing since: " + ee.getMessage());
}
}
rs.close();
stmt.close();
} catch (SQLException e) {
throw e;
} finally {
// Return database connection to the pool
DBConnectionPool.returnDBConnection(dbConn, serialNumber);
}
return i;
}

/**
* Handle documents passed to metacat that are encoded using the "multipart/form-data" mime
* type. This is typically used for uploading data files which may be binary and large.
Expand Down
Loading

0 comments on commit 506336b

Please sign in to comment.