diff --git a/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java b/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java index 43b343f3..461d9dfb 100644 --- a/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java +++ b/src/main/java/org/ecoinformatics/eml/EMLParserServlet.java @@ -62,476 +62,429 @@ /** * Servlet interface for the EMLParser */ -public class EMLParserServlet extends HttpServlet -{ - - private ServletConfig servletconfig = null; - private ServletContext context = null; - private HttpServletRequest request; - private static HttpServletResponse response; - private static PrintWriter out = null; - private Hashtable params = new Hashtable(); - private static final String NAMESPACEKEYWORD = "xmlns"; - public static final String EML2_1_1NAMESPACE = "eml://ecoinformatics.org/eml-2.1.1"; - public static final String EML2_1_0NAMESPACE = "eml://ecoinformatics.org/eml-2.1.0"; - public static final String EML2_0_1NAMESPACE = "eml://ecoinformatics.org/eml-2.0.1"; - public static final String EML2_0_0NAMESPACE = "eml://ecoinformatics.org/eml-2.0.0"; - - /** - * Initialize the servlet - */ - public void init(ServletConfig servletconfig) throws ServletException - { - try - { - super.init(servletconfig); - this.servletconfig = servletconfig; - this.context = servletconfig.getServletContext(); - System.out.println("Starting EMLParserServlet"); +public class EMLParserServlet extends HttpServlet { + + private ServletConfig servletconfig = null; + private ServletContext context = null; + private HttpServletRequest request; + private static HttpServletResponse response; + private static PrintWriter out = null; + private Hashtable params = new Hashtable(); + private static final String NAMESPACEKEYWORD = "xmlns"; + public static final String EML2_1_1NAMESPACE = "eml://ecoinformatics.org/eml-2.1.1"; + public static final String EML2_1_0NAMESPACE = "eml://ecoinformatics.org/eml-2.1.0"; + public static final String EML2_0_1NAMESPACE = "eml://ecoinformatics.org/eml-2.0.1"; + public static final String EML2_0_0NAMESPACE = "eml://ecoinformatics.org/eml-2.0.0"; + + /** + * Initialize the servlet + */ + public void init(ServletConfig servletconfig) throws ServletException { + try { + super.init(servletconfig); + this.servletconfig = servletconfig; + this.context = servletconfig.getServletContext(); + System.out.println("Starting EMLParserServlet"); + } catch (ServletException ex) { + throw ex; + } } - catch (ServletException ex) - { - throw ex; + + /** + * Destroy the servlet + */ + public void destroy() { + System.out.println("Destroying EMLParserServlet"); } - } - - /** - * Destroy the servlet - */ - public void destroy() - { - System.out.println("Destroying EMLParserServlet"); - } - - /** Handle "GET" method requests from HTTP clients */ - public void doGet (HttpServletRequest request, HttpServletResponse response) + + /** Handle "GET" method requests from HTTP clients */ + public void doGet (HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - // Process the data and send back the response - handleGetOrPost(request, response); - } + // Process the data and send back the response + handleGetOrPost(request, response); + } - /** Handle "POST" method requests from HTTP clients */ - public void doPost( HttpServletRequest request, HttpServletResponse response) + /** Handle "POST" method requests from HTTP clients */ + public void doPost( HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - // Process the data and send back the response - handleGetOrPost(request, response); - } - - /** - * Control servlet response depending on the action parameter specified - */ - private void handleGetOrPost(HttpServletRequest request, - HttpServletResponse response) - throws ServletException, IOException - { - this.request = request; - this.response = response; - StringBuffer html = new StringBuffer(); - out = response.getWriter(); - String ctype = request.getContentType(); - InputStream fileToParse = null; - File tempfile = null; - - html.append(""); - html.append(""); - html.append("
"); - html.append("The tests which "); - html.append("are specific to EML, including validation that IDs are "); - html.append("present and properly referenced, have passed.
"); - } - else - { - html.append("The following errors were found:"); - html.append("
").append(e.getMessage()).append("
"); - } + if(action.equals("parse")) { + //parse action + html.append(parse(tempfile)); + } else if(action.equals("textparse")) { + String doctext = ((String[])params.get("doctext"))[0]; + if(doctext == null || doctext.trim().equals("")) { + html.append("Document is XML-schema valid. There were no XML errors found in your document.
"); - } - catch(IOException ioe) - { - html.append("").append(ioe.getMessage()).append("
"); + html.append(parse(tempfile)); + } + } else { + html.append("").append(cnfe.getMessage()).append("
"); + + private String parse(File tempfile) { + StringBuffer html = new StringBuffer(); + + try { + if(tempfile != null) { + EMLParser parser = new EMLParser(tempfile); + html.append("The tests which "); + html.append("are specific to EML, including validation that IDs are "); + html.append("present and properly referenced, have passed.
"); + } else { + html.append("The following errors were found:"); + html.append("
").append(e.getMessage()).append("
"); + } + + try { + // TODO: handle UTF-8 + Reader xmlReader = new FileReader(tempfile); + String namespaceInDoc = findNamespace(xmlReader); + xmlReader.close(); + System.out.println("The namespace in xml is "+namespaceInDoc); + SAXValidate validator = new SAXValidate(true); + validator.runTest(new FileReader(tempfile), "DEFAULT", namespaceInDoc); + html.append("Document is XML-schema valid. There were no XML errors found in your document.
"); + } catch(IOException ioe) { + html.append("").append(ioe.getMessage()).append("
"); + } catch(ClassNotFoundException cnfe) { + html.append("").append(cnfe.getMessage()).append("
"); + } catch(SAXException se) { + if(se.getMessage().indexOf("WARNING") != -1) { + html.append("The following warnings "); + html.append("were issued about your document:
"); + html.append(se.getMessage()).append("
"); + } else { + html.append(""); + html.append("The following errors were "); + html.append("found:
").append(se.getMessage()).append("
"); + } + } + + return html.toString(); } - catch(SAXException se) - { - if(se.getMessage().indexOf("WARNING") != -1) - { - html.append("The following warnings "); - html.append("were issued about your document:
"); - html.append(se.getMessage()).append("
"); - } - else - { - html.append(""); - html.append("The following errors were "); - html.append("found:
").append(se.getMessage()).append("
"); - } + + /** + * This method deals with getting the zip file from the client, unzipping + * it, then running the process on it. + */ + private InputStream handleGetFile(HttpServletRequest request, + HttpServletResponse response) + throws Exception { + Hashtable fileList = new Hashtable(); + try { + MultipartParser mp = new MultipartParser(request, 1024 * 1024 * 8); + Part part; + while ((part = mp.readNextPart()) != null) { + String name = part.getName(); + if (part.isParam()) { + // it's a parameter part + ParamPart paramPart = (ParamPart) part; + String value = paramPart.getStringValue(); + String[] s = {value}; + params.put(name, s); + } else if (part.isFile()) { + // it's a file part + FilePart filePart = (FilePart) part; + fileList.put(name, filePart); + // Stop once the first file part is found, otherwise going onto the + // next part prevents access to the file contents. So...for upload + // to work, the datafile must be the last part + break; + } + } + } catch (Exception ioe) { + throw ioe; + } + + //now that we have the file, do some checking and get the files we need + //out of the zip file. + FilePart fp = (FilePart)fileList.get("filename"); + return fp.getInputStream(); } - return html.toString(); - } - - /** - * This method deals with getting the zip file from the client, unzipping - * it, then running the process on it. - */ - private InputStream handleGetFile(HttpServletRequest request, - HttpServletResponse response) - throws Exception - { - Hashtable fileList = new Hashtable(); - try - { - MultipartParser mp = new MultipartParser(request, 1024 * 1024 * 8); - Part part; - while ((part = mp.readNextPart()) != null) - { - String name = part.getName(); - if (part.isParam()) - { // it's a parameter part - ParamPart paramPart = (ParamPart) part; - String value = paramPart.getStringValue(); - String[] s = {value}; - params.put(name, s); + /** + * Gets namespace from the xml source + */ + public static String findNamespace(Reader xml) throws IOException { + + String namespace = null; + + String eml2_0_0NameSpace = EML2_0_0NAMESPACE; + String eml2_0_1NameSpace = EML2_0_1NAMESPACE; + String eml2_1_0NameSpace = EML2_1_0NAMESPACE; + String eml2_1_1NameSpace = EML2_1_1NAMESPACE; + + if (xml == null) { + //System.out.println("Validation for schema is "+ namespace); + return namespace; } - else if (part.isFile()) - { // it's a file part - FilePart filePart = (FilePart) part; - fileList.put(name, filePart); - // Stop once the first file part is found, otherwise going onto the - // next part prevents access to the file contents. So...for upload - // to work, the datafile must be the last part - break; + String targetLine = getSchemaLine(xml); + + if (targetLine != null) { + + // find if the root element has prefix + String prefix = getPrefix(targetLine); + //System.out.println("prefix is:" + prefix); + int startIndex = 0; + + + if(prefix != null) { + // if prefix found then look for xmlns:prefix + // element to find the ns + String namespaceWithPrefix = NAMESPACEKEYWORD + + ":" + prefix; + startIndex = targetLine.indexOf(namespaceWithPrefix); + //System.out.println("namespaceWithPrefix is:" + namespaceWithPrefix+":"); + //System.out.println("startIndex is:" + startIndex); + + } else { + // if prefix not found then look for xmlns + // attribute to find the ns + startIndex = targetLine.indexOf(NAMESPACEKEYWORD); + //System.out.println("startIndex is:" + startIndex); + } + + int start = 1; + int end = 1; + String namespaceString = null; + int count = 0; + if (startIndex != -1) { + for (int i = startIndex; i < targetLine.length(); i++) { + if (targetLine.charAt(i) == '"') { + count++; + } + if (targetLine.charAt(i) == '"' && count == 1) { + start = i; + } + if (targetLine.charAt(i) == '"' && count == 2) { + end = i; + break; + } + } + } + // else: xmlns not found. namespace = null will be returned + + //System.out.println("targetLine is " + targetLine); + //System.out.println("start is " + end); + //System.out.println("end is " + end); + + if(start < end) { + namespaceString = targetLine.substring(start + 1, end); + //System.out.println("namespaceString is " + namespaceString); + } + //System.out.println("namespace in xml is: "+ namespaceString); + if(namespaceString != null) { + if (namespaceString.indexOf(eml2_0_0NameSpace) != -1) { + namespace = eml2_0_0NameSpace; + } else if (namespaceString.indexOf(eml2_0_1NameSpace) != -1) { + namespace = eml2_0_1NameSpace; + } else if (namespaceString.indexOf(eml2_1_0NameSpace) != -1) { + namespace = eml2_1_0NameSpace; + } else if (namespaceString.indexOf(eml2_1_1NameSpace) != -1) { + namespace = eml2_1_1NameSpace; + } else { + namespace = namespaceString; + } + } } - } + + //System.out.println("Validation for eml is " + namespace); + + return namespace; + } - catch (Exception ioe) - { - throw ioe; + + /* + * Gets the string which contains schema declaration info + */ + private static String getSchemaLine(Reader xml) throws IOException { + + // find the line + String secondLine = null; + int count = 0; + int endIndex = 0; + int startIndex = 0; + final int TARGETNUM = 1; + StringBuffer buffer = new StringBuffer(); + boolean comment = false; + boolean processingInstruction = false; + char thirdPreviousCharacter = '?'; + char secondPreviousCharacter = '?'; + char previousCharacter = '?'; + char currentCharacter = '?'; + int tmp = xml.read(); + while (tmp != -1) { + currentCharacter = (char)tmp; + //in a comment + if (currentCharacter == '-' && previousCharacter == '-' + && secondPreviousCharacter == '!' + && thirdPreviousCharacter == '<') { + comment = true; + } + //out of comment + if (comment && currentCharacter == '>' && previousCharacter == '-' + && secondPreviousCharacter == '-') { + comment = false; + } + + //in a processingInstruction + if (currentCharacter == '?' && previousCharacter == '<') { + processingInstruction = true; + } + + //out of processingInstruction + if (processingInstruction && currentCharacter == '>' + && previousCharacter == '?') { + processingInstruction = false; + } + + //this is not comment or a processingInstruction + if (currentCharacter != '!' && previousCharacter == '<' + && !comment && !processingInstruction) { + count++; + } + + // get target line + if (count == TARGETNUM && currentCharacter != '>') { + buffer.append(currentCharacter); + } + if (count == TARGETNUM && currentCharacter == '>') { + break; + } + thirdPreviousCharacter = secondPreviousCharacter; + secondPreviousCharacter = previousCharacter; + previousCharacter = currentCharacter; + tmp = xml.read(); + } + secondLine = buffer.toString(); + //System.out.println("the second line string is: " + secondLine); + + //xml.reset(); + return secondLine; } - //now that we have the file, do some checking and get the files we need - //out of the zip file. - FilePart fp = (FilePart)fileList.get("filename"); - return fp.getInputStream(); - } - - /** - * Gets namespace from the xml source - */ - public static String findNamespace(Reader xml) throws IOException { - - String namespace = null; - - String eml2_0_0NameSpace = EML2_0_0NAMESPACE; - String eml2_0_1NameSpace = EML2_0_1NAMESPACE; - String eml2_1_0NameSpace = EML2_1_0NAMESPACE; - String eml2_1_1NameSpace = EML2_1_1NAMESPACE; - - if (xml == null) { - //System.out.println("Validation for schema is "+ namespace); - return namespace; - } - String targetLine = getSchemaLine(xml); - - if (targetLine != null) { - - // find if the root element has prefix - String prefix = getPrefix(targetLine); - //System.out.println("prefix is:" + prefix); - int startIndex = 0; - - - if(prefix != null) { - // if prefix found then look for xmlns:prefix - // element to find the ns - String namespaceWithPrefix = NAMESPACEKEYWORD - + ":" + prefix; - startIndex = targetLine.indexOf(namespaceWithPrefix); - //System.out.println("namespaceWithPrefix is:" + namespaceWithPrefix+":"); - //System.out.println("startIndex is:" + startIndex); - - } else { - // if prefix not found then look for xmlns - // attribute to find the ns - startIndex = targetLine.indexOf(NAMESPACEKEYWORD); - //System.out.println("startIndex is:" + startIndex); - } - - int start = 1; - int end = 1; - String namespaceString = null; - int count = 0; - if (startIndex != -1) { - for (int i = startIndex; i < targetLine.length(); i++) { - if (targetLine.charAt(i) == '"') { - count++; - } - if (targetLine.charAt(i) == '"' && count == 1) { - start = i; - } - if (targetLine.charAt(i) == '"' && count == 2) { - end = i; - break; - } - } - } - // else: xmlns not found. namespace = null will be returned - - //System.out.println("targetLine is " + targetLine); - //System.out.println("start is " + end); - //System.out.println("end is " + end); - - if(start < end){ - namespaceString = targetLine.substring(start + 1, end); - //System.out.println("namespaceString is " + namespaceString); - } - //System.out.println("namespace in xml is: "+ namespaceString); - if(namespaceString != null){ - if (namespaceString.indexOf(eml2_0_0NameSpace) != -1) { - namespace = eml2_0_0NameSpace; - } else if (namespaceString.indexOf(eml2_0_1NameSpace) != -1) { - namespace = eml2_0_1NameSpace; - } else if (namespaceString.indexOf(eml2_1_0NameSpace) != -1) { - namespace = eml2_1_0NameSpace; - } else if (namespaceString.indexOf(eml2_1_1NameSpace) != -1) { - namespace = eml2_1_1NameSpace; - } else { - namespace = namespaceString; - } - } - } - - //System.out.println("Validation for eml is " + namespace); - - return namespace; - - } - - /* - * Gets the string which contains schema declaration info - */ - private static String getSchemaLine(Reader xml) throws IOException { - - // find the line - String secondLine = null; - int count = 0; - int endIndex = 0; - int startIndex = 0; - final int TARGETNUM = 1; - StringBuffer buffer = new StringBuffer(); - boolean comment = false; - boolean processingInstruction = false; - char thirdPreviousCharacter = '?'; - char secondPreviousCharacter = '?'; - char previousCharacter = '?'; - char currentCharacter = '?'; - int tmp = xml.read(); - while (tmp != -1) { - currentCharacter = (char)tmp; - //in a comment - if (currentCharacter == '-' && previousCharacter == '-' - && secondPreviousCharacter == '!' - && thirdPreviousCharacter == '<') { - comment = true; - } - //out of comment - if (comment && currentCharacter == '>' && previousCharacter == '-' - && secondPreviousCharacter == '-') { - comment = false; - } - - //in a processingInstruction - if (currentCharacter == '?' && previousCharacter == '<') { - processingInstruction = true; - } - - //out of processingInstruction - if (processingInstruction && currentCharacter == '>' - && previousCharacter == '?') { - processingInstruction = false; - } - - //this is not comment or a processingInstruction - if (currentCharacter != '!' && previousCharacter == '<' - && !comment && !processingInstruction) { - count++; - } - - // get target line - if (count == TARGETNUM && currentCharacter != '>') { - buffer.append(currentCharacter); - } - if (count == TARGETNUM && currentCharacter == '>') { - break; - } - thirdPreviousCharacter = secondPreviousCharacter; - secondPreviousCharacter = previousCharacter; - previousCharacter = currentCharacter; - tmp = xml.read(); - } - secondLine = buffer.toString(); - //System.out.println("the second line string is: " + secondLine); - - //xml.reset(); - return secondLine; - } - - /* - * Gets the prefix of this eml document. E.g eml for eml:eml - */ - private static String getPrefix(String schemaLine) { - - String prefix = null; - - if(schemaLine.indexOf(" ") > 0){ - String rootElement = ""; - try { - rootElement = schemaLine.substring(0, schemaLine.indexOf(" ")); - } catch (StringIndexOutOfBoundsException sioobe) { - rootElement = schemaLine; - } - - //System.out.println("rootElement:" + rootElement); - - if(rootElement.indexOf(":") > 0){ - prefix = rootElement.substring(0, rootElement.indexOf(":")); - } - - if(prefix != null){ - return prefix.trim(); - } - } - return null; - } + /* + * Gets the prefix of this eml document. E.g eml for eml:eml + */ + private static String getPrefix(String schemaLine) { + + String prefix = null; + + if(schemaLine.indexOf(" ") > 0) { + String rootElement = ""; + try { + rootElement = schemaLine.substring(0, schemaLine.indexOf(" ")); + } catch (StringIndexOutOfBoundsException sioobe) { + rootElement = schemaLine; + } + + //System.out.println("rootElement:" + rootElement); + + if(rootElement.indexOf(":") > 0) { + prefix = rootElement.substring(0, rootElement.indexOf(":")); + } + + if(prefix != null) { + return prefix.trim(); + } + } + return null; + } }