diff --git a/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java b/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java index 43b343f3..461d9dfb 100644 --- a/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java +++ b/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java @@ -62,476 +62,429 @@ /** * Servlet interface for the EMLParser */ -public class EMLParserServlet extends HttpServlet -{ - - private ServletConfig servletconfig = null; - private ServletContext context = null; - private HttpServletRequest request; - private static HttpServletResponse response; - private static PrintWriter out = null; - private Hashtable params = new Hashtable(); - private static final String NAMESPACEKEYWORD = "xmlns"; - public static final String EML2_1_1NAMESPACE = "eml://ecoinformatics.org/eml-2.1.1"; - public static final String EML2_1_0NAMESPACE = "eml://ecoinformatics.org/eml-2.1.0"; - public static final String EML2_0_1NAMESPACE = "eml://ecoinformatics.org/eml-2.0.1"; - public static final String EML2_0_0NAMESPACE = "eml://ecoinformatics.org/eml-2.0.0"; - - /** - * Initialize the servlet - */ - public void init(ServletConfig servletconfig) throws ServletException - { - try - { - super.init(servletconfig); - this.servletconfig = servletconfig; - this.context = servletconfig.getServletContext(); - System.out.println("Starting EMLParserServlet"); +public class EMLParserServlet extends HttpServlet { + + private ServletConfig servletconfig = null; + private ServletContext context = null; + private HttpServletRequest request; + private static HttpServletResponse response; + private static PrintWriter out = null; + private Hashtable params = new Hashtable(); + private static final String NAMESPACEKEYWORD = "xmlns"; + public static final String EML2_1_1NAMESPACE = "eml://ecoinformatics.org/eml-2.1.1"; + public static final String EML2_1_0NAMESPACE = "eml://ecoinformatics.org/eml-2.1.0"; + public static final String EML2_0_1NAMESPACE = "eml://ecoinformatics.org/eml-2.0.1"; + public static final String EML2_0_0NAMESPACE = "eml://ecoinformatics.org/eml-2.0.0"; + + /** + * Initialize the servlet + */ + public void init(ServletConfig servletconfig) throws ServletException { + try { + super.init(servletconfig); + this.servletconfig = servletconfig; + this.context = servletconfig.getServletContext(); + System.out.println("Starting EMLParserServlet"); + } catch (ServletException ex) { + throw ex; + } } - catch (ServletException ex) - { - throw ex; + + /** + * Destroy the servlet + */ + public void destroy() { + System.out.println("Destroying EMLParserServlet"); } - } - - /** - * Destroy the servlet - */ - public void destroy() - { - System.out.println("Destroying EMLParserServlet"); - } - - /** Handle "GET" method requests from HTTP clients */ - public void doGet (HttpServletRequest request, HttpServletResponse response) + + /** Handle "GET" method requests from HTTP clients */ + public void doGet (HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - // Process the data and send back the response - handleGetOrPost(request, response); - } + // Process the data and send back the response + handleGetOrPost(request, response); + } - /** Handle "POST" method requests from HTTP clients */ - public void doPost( HttpServletRequest request, HttpServletResponse response) + /** Handle "POST" method requests from HTTP clients */ + public void doPost( HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - // Process the data and send back the response - handleGetOrPost(request, response); - } - - /** - * Control servlet response depending on the action parameter specified - */ - private void handleGetOrPost(HttpServletRequest request, - HttpServletResponse response) - throws ServletException, IOException - { - this.request = request; - this.response = response; - StringBuffer html = new StringBuffer(); - out = response.getWriter(); - String ctype = request.getContentType(); - InputStream fileToParse = null; - File tempfile = null; - - html.append(""); - html.append(""); - html.append(""); - html.append("EML Parser"); - html.append(""); - html.append(""); - html.append("

EML Parser

"); - - HttpSession sess = request.getSession(true); - String sess_id = ""; - try - { //get the cookie for the session. - sess_id = (String)sess.getId(); - } - catch(IllegalStateException ise) - { - System.out.println("error in handleGetOrPost: this shouldn't " + - "happen: the session should be valid: " + - ise.getMessage()); + // Process the data and send back the response + handleGetOrPost(request, response); } - tempfile = File.createTempFile(".emlparser", ".tmp"); - - if (ctype != null && ctype.startsWith("multipart/form-data")) - { //deal with multipart encoding of the package zip file - try - { - fileToParse = handleGetFile(request, response); - int c = fileToParse.read(); - FileOutputStream fos = new FileOutputStream(tempfile); - while(c != -1) - { - fos.write(c); - c = fileToParse.read(); + /** + * Control servlet response depending on the action parameter specified + */ + private void handleGetOrPost(HttpServletRequest request, + HttpServletResponse response) + throws ServletException, IOException { + this.request = request; + this.response = response; + StringBuffer html = new StringBuffer(); + out = response.getWriter(); + String ctype = request.getContentType(); + InputStream fileToParse = null; + File tempfile = null; + + html.append(""); + html.append(""); + html.append(""); + html.append("EML Parser"); + html.append(""); + html.append(""); + html.append("

EML Parser

"); + + HttpSession sess = request.getSession(true); + String sess_id = ""; + try { + //get the cookie for the session. + sess_id = (String)sess.getId(); + } catch(IllegalStateException ise) { + System.out.println("error in handleGetOrPost: this shouldn't " + + "happen: the session should be valid: " + + ise.getMessage()); } - fos.flush(); - fos.close(); - } - catch(Exception e) - { - out.println("

Error handling multipart data: " + - e.getMessage() + "

"); - System.out.println("Error handling multipart data: " + e.getMessage()); - e.printStackTrace(); - } - } - else - { - Enumeration paramlist = request.getParameterNames(); - while (paramlist.hasMoreElements()) - { - String name = (String)paramlist.nextElement(); - Object value = request.getParameterValues(name); - params.put(name,value); - } - } - String action = ((String[])params.get("action"))[0]; + tempfile = File.createTempFile(".emlparser", ".tmp"); - if(action.equals("parse")) - { //parse action - html.append(parse(tempfile)); - } - else if(action.equals("textparse")) - { - String doctext = ((String[])params.get("doctext"))[0]; - if(doctext == null || doctext.trim().equals("")) - { - html.append("

Error. Submitted document is null.

"); - } - else - { - StringReader sr = new StringReader(doctext); - FileWriter fw = new FileWriter(tempfile); - int c = sr.read(); - while(c != -1) - { - fw.write(c); - c = sr.read(); + if (ctype != null && ctype.startsWith("multipart/form-data")) { + //deal with multipart encoding of the package zip file + try { + fileToParse = handleGetFile(request, response); + int c = fileToParse.read(); + FileOutputStream fos = new FileOutputStream(tempfile); + while(c != -1) { + fos.write(c); + c = fileToParse.read(); + } + fos.flush(); + fos.close(); + } catch(Exception e) { + out.println("

Error handling multipart data: " + + e.getMessage() + "

"); + System.out.println("Error handling multipart data: " + e.getMessage()); + e.printStackTrace(); + } + } else { + Enumeration paramlist = request.getParameterNames(); + while (paramlist.hasMoreElements()) { + String name = (String)paramlist.nextElement(); + Object value = request.getParameterValues(name); + params.put(name,value); + } } - fw.flush(); - fw.close(); - html.append(parse(tempfile)); - } - } - else - { - html.append("

Error. Action '").append(action); - html.append("' not registered

"); - } + String action = ((String[])params.get("action"))[0]; - tempfile.delete(); - - html.append("
Back to the previous page."); - - html.append(""); - response.setContentType("text/html"); - out.println(html.toString()); - out.flush(); - } - - private String parse(File tempfile) - { - StringBuffer html = new StringBuffer(); - - try - { - if(tempfile != null) - { - EMLParser parser = new EMLParser(tempfile); - html.append("

EML specific tests: Passed.

The tests which "); - html.append("are specific to EML, including validation that IDs are "); - html.append("present and properly referenced, have passed.

"); - } - else - { - html.append("

Error: The file sent to the parser was null.

"); - } - } - catch(Exception e) - { - html.append("

EML specific tests: Failed.

The following errors were found:"); - html.append("

").append(e.getMessage()).append("

"); - } + if(action.equals("parse")) { + //parse action + html.append(parse(tempfile)); + } else if(action.equals("textparse")) { + String doctext = ((String[])params.get("doctext"))[0]; + if(doctext == null || doctext.trim().equals("")) { + html.append("

Error. Submitted document is null.

"); + } else { + StringReader sr = new StringReader(doctext); + FileWriter fw = new FileWriter(tempfile); + int c = sr.read(); + while(c != -1) { + fw.write(c); + c = sr.read(); + } + fw.flush(); + fw.close(); - try - { - // TODO: handle UTF-8 - Reader xmlReader = new FileReader(tempfile); - String namespaceInDoc = findNamespace(xmlReader); - xmlReader.close(); - System.out.println("The namespace in xml is "+namespaceInDoc); - SAXValidate validator = new SAXValidate(true); - validator.runTest(new FileReader(tempfile), "DEFAULT", namespaceInDoc); - html.append("

XML specific tests: Passed.

"); - html.append("

Document is XML-schema valid. There were no XML errors found in your document.

"); - } - catch(IOException ioe) - { - html.append("

IOException: Error reading file

"); - html.append("

").append(ioe.getMessage()).append("

"); + html.append(parse(tempfile)); + } + } else { + html.append("

Error. Action '").append(action); + html.append("' not registered

"); + } + + tempfile.delete(); + + html.append("
Back to the previous page."); + + html.append(""); + response.setContentType("text/html"); + out.println(html.toString()); + out.flush(); } - catch(ClassNotFoundException cnfe) - { - html.append("

Parser class not found

"); - html.append("

").append(cnfe.getMessage()).append("

"); + + private String parse(File tempfile) { + StringBuffer html = new StringBuffer(); + + try { + if(tempfile != null) { + EMLParser parser = new EMLParser(tempfile); + html.append("

EML specific tests: Passed.

The tests which "); + html.append("are specific to EML, including validation that IDs are "); + html.append("present and properly referenced, have passed.

"); + } else { + html.append("

Error: The file sent to the parser was null.

"); + } + } catch(Exception e) { + html.append("

EML specific tests: Failed.

The following errors were found:"); + html.append("

").append(e.getMessage()).append("

"); + } + + try { + // TODO: handle UTF-8 + Reader xmlReader = new FileReader(tempfile); + String namespaceInDoc = findNamespace(xmlReader); + xmlReader.close(); + System.out.println("The namespace in xml is "+namespaceInDoc); + SAXValidate validator = new SAXValidate(true); + validator.runTest(new FileReader(tempfile), "DEFAULT", namespaceInDoc); + html.append("

XML specific tests: Passed.

"); + html.append("

Document is XML-schema valid. There were no XML errors found in your document.

"); + } catch(IOException ioe) { + html.append("

IOException: Error reading file

"); + html.append("

").append(ioe.getMessage()).append("

"); + } catch(ClassNotFoundException cnfe) { + html.append("

Parser class not found

"); + html.append("

").append(cnfe.getMessage()).append("

"); + } catch(SAXException se) { + if(se.getMessage().indexOf("WARNING") != -1) { + html.append("

XML-Schema Warning

The following warnings "); + html.append("were issued about your document:

"); + html.append(se.getMessage()).append("

"); + } else { + html.append("

XML specific tests: Failed

"); + html.append("The following errors were "); + html.append("found:

").append(se.getMessage()).append("

"); + } + } + + return html.toString(); } - catch(SAXException se) - { - if(se.getMessage().indexOf("WARNING") != -1) - { - html.append("

XML-Schema Warning

The following warnings "); - html.append("were issued about your document:

"); - html.append(se.getMessage()).append("

"); - } - else - { - html.append("

XML specific tests: Failed

"); - html.append("The following errors were "); - html.append("found:

").append(se.getMessage()).append("

"); - } + + /** + * This method deals with getting the zip file from the client, unzipping + * it, then running the process on it. + */ + private InputStream handleGetFile(HttpServletRequest request, + HttpServletResponse response) + throws Exception { + Hashtable fileList = new Hashtable(); + try { + MultipartParser mp = new MultipartParser(request, 1024 * 1024 * 8); + Part part; + while ((part = mp.readNextPart()) != null) { + String name = part.getName(); + if (part.isParam()) { + // it's a parameter part + ParamPart paramPart = (ParamPart) part; + String value = paramPart.getStringValue(); + String[] s = {value}; + params.put(name, s); + } else if (part.isFile()) { + // it's a file part + FilePart filePart = (FilePart) part; + fileList.put(name, filePart); + // Stop once the first file part is found, otherwise going onto the + // next part prevents access to the file contents. So...for upload + // to work, the datafile must be the last part + break; + } + } + } catch (Exception ioe) { + throw ioe; + } + + //now that we have the file, do some checking and get the files we need + //out of the zip file. + FilePart fp = (FilePart)fileList.get("filename"); + return fp.getInputStream(); } - return html.toString(); - } - - /** - * This method deals with getting the zip file from the client, unzipping - * it, then running the process on it. - */ - private InputStream handleGetFile(HttpServletRequest request, - HttpServletResponse response) - throws Exception - { - Hashtable fileList = new Hashtable(); - try - { - MultipartParser mp = new MultipartParser(request, 1024 * 1024 * 8); - Part part; - while ((part = mp.readNextPart()) != null) - { - String name = part.getName(); - if (part.isParam()) - { // it's a parameter part - ParamPart paramPart = (ParamPart) part; - String value = paramPart.getStringValue(); - String[] s = {value}; - params.put(name, s); + /** + * Gets namespace from the xml source + */ + public static String findNamespace(Reader xml) throws IOException { + + String namespace = null; + + String eml2_0_0NameSpace = EML2_0_0NAMESPACE; + String eml2_0_1NameSpace = EML2_0_1NAMESPACE; + String eml2_1_0NameSpace = EML2_1_0NAMESPACE; + String eml2_1_1NameSpace = EML2_1_1NAMESPACE; + + if (xml == null) { + //System.out.println("Validation for schema is "+ namespace); + return namespace; } - else if (part.isFile()) - { // it's a file part - FilePart filePart = (FilePart) part; - fileList.put(name, filePart); - // Stop once the first file part is found, otherwise going onto the - // next part prevents access to the file contents. So...for upload - // to work, the datafile must be the last part - break; + String targetLine = getSchemaLine(xml); + + if (targetLine != null) { + + // find if the root element has prefix + String prefix = getPrefix(targetLine); + //System.out.println("prefix is:" + prefix); + int startIndex = 0; + + + if(prefix != null) { + // if prefix found then look for xmlns:prefix + // element to find the ns + String namespaceWithPrefix = NAMESPACEKEYWORD + + ":" + prefix; + startIndex = targetLine.indexOf(namespaceWithPrefix); + //System.out.println("namespaceWithPrefix is:" + namespaceWithPrefix+":"); + //System.out.println("startIndex is:" + startIndex); + + } else { + // if prefix not found then look for xmlns + // attribute to find the ns + startIndex = targetLine.indexOf(NAMESPACEKEYWORD); + //System.out.println("startIndex is:" + startIndex); + } + + int start = 1; + int end = 1; + String namespaceString = null; + int count = 0; + if (startIndex != -1) { + for (int i = startIndex; i < targetLine.length(); i++) { + if (targetLine.charAt(i) == '"') { + count++; + } + if (targetLine.charAt(i) == '"' && count == 1) { + start = i; + } + if (targetLine.charAt(i) == '"' && count == 2) { + end = i; + break; + } + } + } + // else: xmlns not found. namespace = null will be returned + + //System.out.println("targetLine is " + targetLine); + //System.out.println("start is " + end); + //System.out.println("end is " + end); + + if(start < end) { + namespaceString = targetLine.substring(start + 1, end); + //System.out.println("namespaceString is " + namespaceString); + } + //System.out.println("namespace in xml is: "+ namespaceString); + if(namespaceString != null) { + if (namespaceString.indexOf(eml2_0_0NameSpace) != -1) { + namespace = eml2_0_0NameSpace; + } else if (namespaceString.indexOf(eml2_0_1NameSpace) != -1) { + namespace = eml2_0_1NameSpace; + } else if (namespaceString.indexOf(eml2_1_0NameSpace) != -1) { + namespace = eml2_1_0NameSpace; + } else if (namespaceString.indexOf(eml2_1_1NameSpace) != -1) { + namespace = eml2_1_1NameSpace; + } else { + namespace = namespaceString; + } + } } - } + + //System.out.println("Validation for eml is " + namespace); + + return namespace; + } - catch (Exception ioe) - { - throw ioe; + + /* + * Gets the string which contains schema declaration info + */ + private static String getSchemaLine(Reader xml) throws IOException { + + // find the line + String secondLine = null; + int count = 0; + int endIndex = 0; + int startIndex = 0; + final int TARGETNUM = 1; + StringBuffer buffer = new StringBuffer(); + boolean comment = false; + boolean processingInstruction = false; + char thirdPreviousCharacter = '?'; + char secondPreviousCharacter = '?'; + char previousCharacter = '?'; + char currentCharacter = '?'; + int tmp = xml.read(); + while (tmp != -1) { + currentCharacter = (char)tmp; + //in a comment + if (currentCharacter == '-' && previousCharacter == '-' + && secondPreviousCharacter == '!' + && thirdPreviousCharacter == '<') { + comment = true; + } + //out of comment + if (comment && currentCharacter == '>' && previousCharacter == '-' + && secondPreviousCharacter == '-') { + comment = false; + } + + //in a processingInstruction + if (currentCharacter == '?' && previousCharacter == '<') { + processingInstruction = true; + } + + //out of processingInstruction + if (processingInstruction && currentCharacter == '>' + && previousCharacter == '?') { + processingInstruction = false; + } + + //this is not comment or a processingInstruction + if (currentCharacter != '!' && previousCharacter == '<' + && !comment && !processingInstruction) { + count++; + } + + // get target line + if (count == TARGETNUM && currentCharacter != '>') { + buffer.append(currentCharacter); + } + if (count == TARGETNUM && currentCharacter == '>') { + break; + } + thirdPreviousCharacter = secondPreviousCharacter; + secondPreviousCharacter = previousCharacter; + previousCharacter = currentCharacter; + tmp = xml.read(); + } + secondLine = buffer.toString(); + //System.out.println("the second line string is: " + secondLine); + + //xml.reset(); + return secondLine; } - //now that we have the file, do some checking and get the files we need - //out of the zip file. - FilePart fp = (FilePart)fileList.get("filename"); - return fp.getInputStream(); - } - - /** - * Gets namespace from the xml source - */ - public static String findNamespace(Reader xml) throws IOException { - - String namespace = null; - - String eml2_0_0NameSpace = EML2_0_0NAMESPACE; - String eml2_0_1NameSpace = EML2_0_1NAMESPACE; - String eml2_1_0NameSpace = EML2_1_0NAMESPACE; - String eml2_1_1NameSpace = EML2_1_1NAMESPACE; - - if (xml == null) { - //System.out.println("Validation for schema is "+ namespace); - return namespace; - } - String targetLine = getSchemaLine(xml); - - if (targetLine != null) { - - // find if the root element has prefix - String prefix = getPrefix(targetLine); - //System.out.println("prefix is:" + prefix); - int startIndex = 0; - - - if(prefix != null) { - // if prefix found then look for xmlns:prefix - // element to find the ns - String namespaceWithPrefix = NAMESPACEKEYWORD - + ":" + prefix; - startIndex = targetLine.indexOf(namespaceWithPrefix); - //System.out.println("namespaceWithPrefix is:" + namespaceWithPrefix+":"); - //System.out.println("startIndex is:" + startIndex); - - } else { - // if prefix not found then look for xmlns - // attribute to find the ns - startIndex = targetLine.indexOf(NAMESPACEKEYWORD); - //System.out.println("startIndex is:" + startIndex); - } - - int start = 1; - int end = 1; - String namespaceString = null; - int count = 0; - if (startIndex != -1) { - for (int i = startIndex; i < targetLine.length(); i++) { - if (targetLine.charAt(i) == '"') { - count++; - } - if (targetLine.charAt(i) == '"' && count == 1) { - start = i; - } - if (targetLine.charAt(i) == '"' && count == 2) { - end = i; - break; - } - } - } - // else: xmlns not found. namespace = null will be returned - - //System.out.println("targetLine is " + targetLine); - //System.out.println("start is " + end); - //System.out.println("end is " + end); - - if(start < end){ - namespaceString = targetLine.substring(start + 1, end); - //System.out.println("namespaceString is " + namespaceString); - } - //System.out.println("namespace in xml is: "+ namespaceString); - if(namespaceString != null){ - if (namespaceString.indexOf(eml2_0_0NameSpace) != -1) { - namespace = eml2_0_0NameSpace; - } else if (namespaceString.indexOf(eml2_0_1NameSpace) != -1) { - namespace = eml2_0_1NameSpace; - } else if (namespaceString.indexOf(eml2_1_0NameSpace) != -1) { - namespace = eml2_1_0NameSpace; - } else if (namespaceString.indexOf(eml2_1_1NameSpace) != -1) { - namespace = eml2_1_1NameSpace; - } else { - namespace = namespaceString; - } - } - } - - //System.out.println("Validation for eml is " + namespace); - - return namespace; - - } - - /* - * Gets the string which contains schema declaration info - */ - private static String getSchemaLine(Reader xml) throws IOException { - - // find the line - String secondLine = null; - int count = 0; - int endIndex = 0; - int startIndex = 0; - final int TARGETNUM = 1; - StringBuffer buffer = new StringBuffer(); - boolean comment = false; - boolean processingInstruction = false; - char thirdPreviousCharacter = '?'; - char secondPreviousCharacter = '?'; - char previousCharacter = '?'; - char currentCharacter = '?'; - int tmp = xml.read(); - while (tmp != -1) { - currentCharacter = (char)tmp; - //in a comment - if (currentCharacter == '-' && previousCharacter == '-' - && secondPreviousCharacter == '!' - && thirdPreviousCharacter == '<') { - comment = true; - } - //out of comment - if (comment && currentCharacter == '>' && previousCharacter == '-' - && secondPreviousCharacter == '-') { - comment = false; - } - - //in a processingInstruction - if (currentCharacter == '?' && previousCharacter == '<') { - processingInstruction = true; - } - - //out of processingInstruction - if (processingInstruction && currentCharacter == '>' - && previousCharacter == '?') { - processingInstruction = false; - } - - //this is not comment or a processingInstruction - if (currentCharacter != '!' && previousCharacter == '<' - && !comment && !processingInstruction) { - count++; - } - - // get target line - if (count == TARGETNUM && currentCharacter != '>') { - buffer.append(currentCharacter); - } - if (count == TARGETNUM && currentCharacter == '>') { - break; - } - thirdPreviousCharacter = secondPreviousCharacter; - secondPreviousCharacter = previousCharacter; - previousCharacter = currentCharacter; - tmp = xml.read(); - } - secondLine = buffer.toString(); - //System.out.println("the second line string is: " + secondLine); - - //xml.reset(); - return secondLine; - } - - /* - * Gets the prefix of this eml document. E.g eml for eml:eml - */ - private static String getPrefix(String schemaLine) { - - String prefix = null; - - if(schemaLine.indexOf(" ") > 0){ - String rootElement = ""; - try { - rootElement = schemaLine.substring(0, schemaLine.indexOf(" ")); - } catch (StringIndexOutOfBoundsException sioobe) { - rootElement = schemaLine; - } - - //System.out.println("rootElement:" + rootElement); - - if(rootElement.indexOf(":") > 0){ - prefix = rootElement.substring(0, rootElement.indexOf(":")); - } - - if(prefix != null){ - return prefix.trim(); - } - } - return null; - } + /* + * Gets the prefix of this eml document. E.g eml for eml:eml + */ + private static String getPrefix(String schemaLine) { + + String prefix = null; + + if(schemaLine.indexOf(" ") > 0) { + String rootElement = ""; + try { + rootElement = schemaLine.substring(0, schemaLine.indexOf(" ")); + } catch (StringIndexOutOfBoundsException sioobe) { + rootElement = schemaLine; + } + + //System.out.println("rootElement:" + rootElement); + + if(rootElement.indexOf(":") > 0) { + prefix = rootElement.substring(0, rootElement.indexOf(":")); + } + + if(prefix != null) { + return prefix.trim(); + } + } + return null; + } }