Reformat and document EMLValidator.

NCEAS · Jan 31, 2019 · 5036bfa · 5036bfa
1 parent a25c845
commit 5036bfa
Show file tree

Hide file tree

Showing 2 changed files with 199 additions and 143 deletions.
diff --git a/lib/eml.jar b/lib/eml.jar
diff --git a/src/main/java/org/ecoinformatics/eml/EMLValidator.java b/src/main/java/org/ecoinformatics/eml/EMLValidator.java
@@ -19,183 +19,239 @@
 
 import org.xml.sax.InputSource;
 
+/**
+ * Validate an EML document by executing additional validation checks that are
+ * not already encoded in XML Schema.  These are described in the EML
+ * specification. 
+ *
+ * Typical usage is to instantiate EMLValidator against an EML filename, and
+ * then execute the validate() method.
+ */
 public class EMLValidator {
 
-	private Document doc = null;
-
-	public EMLValidator(String filename) {
-		try {
-			FileInputStream f = new FileInputStream(new File(filename));
-			doc = parseDocument(new InputSource(f));
-		} catch (Exception e) {
-			System.err.println(e.getMessage());
-		}
-	}
-
-	public static void main(String[] args) {
-		String emlfile = args[0];
-		EMLValidator validator = new EMLValidator(emlfile);
-		boolean isValid = validator.validate();
-        System.err.println("isValid: " + (new Boolean(isValid)).toString());
-	}
+    // The local document being validated, created during construction
+    private Document doc = null;
+
+    /**
+     * Construct an EMLValidator for use on a given file path.
+     * @param filename the relative or absolute path to a file to be validated
+     */
+    public EMLValidator(String filename) {
+        try {
+            FileInputStream f = new FileInputStream(new File(filename));
+            doc = parseDocument(new InputSource(f));
+        } catch (Exception e) {
+            System.err.println(e.getMessage());
+        }
+    }
 
-	private Document parseDocument(InputSource in) throws Exception {
-		DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
-		dfactory.setNamespaceAware(false);
-		Document doc = dfactory.newDocumentBuilder().parse(in);
-		return doc;
-	}
+    /**
+     * Simple method for command-line validation.  The first argument is the
+     * EML filename.
+     */
+    public static void main(String[] args) {
+        String emlfile = args[0];
+        EMLValidator validator = new EMLValidator(emlfile);
+        boolean isValid = validator.validate();
+        System.err.println("isValid: " + (new Boolean(isValid)).toString());
+    }
 
-	public boolean validate() {
-		boolean isValid = true;
+    /**
+     * Validate the EML document that was provided when the validator was
+     * instantiated.  Thiss runs all of the specialized checks that go beyond
+     * XML Schema validation.  The method returns true if valid, false
+     * otherwise.
+     * @return true if the document iss valid, false otherwise
+     */
+    public boolean validate() {
+        boolean isValid = true;
 
-		// All EML documents MUST have the 'eml' module as the root
+        // All EML documents MUST have the 'eml' module as the root
         NodeList root_not_eml = getXPathNodeList("/*[local-name() != 'eml']");
         int length = root_not_eml.getLength();
         if (length > 0) {
-		    System.err.println("Invalid: root element is not eml.");
-			isValid = false;
+            System.err.println("Invalid: root element is not eml.");
+            isValid = false;
         }
 
-		// All `id` attributes and `packageId` within the document MUST be unique
-		Vector<String> packageId = getXPathValues("//*/@packageId");
-		Vector<String> ids = getXPathValues("//*[@id]/@id");
+        // All `id` attributes and `packageId` within the document MUST be unique
+        Vector<String> packageId = getXPathValues("//*/@packageId");
+        Vector<String> ids = getXPathValues("//*[@id]/@id");
         ids.addAll(packageId);
-		HashMap idmap = new HashMap();
-		for (String s : ids) {
-			idmap.put(s, null);
-		}
-		if (ids.size() != idmap.size()) {
-			System.err.println("Invalid: ID attributes must be unique. Duplicates exist.");
-			isValid = false;
-		}
-
-		// If an element references another using a child `references` element or references attribute, another element with that value in its `id` attribute MUST exist in the document
-		// If an `additionalMetadata` element references another using a child `describes` element, another element with that value in its `id` attribute MUST exist in the document
-		Vector<String> refs = getXPathValues("//annotation[@references]/@references|//references|/describes");
-		for (String s : refs) {
-			if (!ids.contains(s)) {
-				System.err.println("Invalid: Reference missing from IDs: " + s);
-				isValid = false;
-			}
-		}
-
-
-		// Elements which contain an `annotation` child element MUST contain an `id` attribute, unless the containing `annotation` element contains a `references` attribute
+        HashMap idmap = new HashMap();
+        for (String s : ids) {
+            idmap.put(s, null);
+        }
+        if (ids.size() != idmap.size()) {
+            System.err.println("Invalid: ID attributes must be unique. Duplicates exist.");
+            isValid = false;
+        }
+
+        // If an element references another using a child `references` element 
+        // or references attribute, another element with that value in its `id` 
+        // attribute MUST exist in the document
+        // If an `additionalMetadata` element references another using a child 
+        // `describes` element, another element with that value in its `id` 
+        // attribute MUST exist in the document
+        Vector<String> refs = getXPathValues("//annotation[@references]/@references|//references|/describes");
+        for (String s : refs) {
+            if (!ids.contains(s)) {
+                System.err.println("Invalid: Reference missing from IDs: " + s);
+                isValid = false;
+            }
+        }
+
+
+        // Elements which contain an `annotation` child element MUST contain an 
+        // `id` attribute, unless the containing `annotation` element contains a 
+        // `references` attribute
         NodeList missing_id_ref = getXPathNodeList("//*[annotation and not(@id) and not(annotation[@references]) and not(parent::*/describes)]");
         length = missing_id_ref.getLength();
         if (length > 0) {
-		    System.err.println("Invalid: annotations lack id or references: " + length);
-			isValid = false;
-		    //for (int i = 0; i < missing_id_ref.getLength(); i++) {
-			    //Node n = missing_id_ref.item(i);
-			    //debugNode(n, "");
+            System.err.println("Invalid: annotations lack id or references: " + length);
+            isValid = false;
+            //for (int i = 0; i < missing_id_ref.getLength(); i++) {
+            //Node n = missing_id_ref.item(i);
+            //debugNode(n, "");
             //}
         }
 
-		// If an element references another using a child `references` element, it MUST not have an `id` attribute itself
+        // If an element references another using a child `references` element, 
+        // it MUST not have an `id` attribute itself
         NodeList both_id_ref = getXPathNodeList("//*[references and @id]");
         length = both_id_ref.getLength();
         if (length > 0) {
-		    System.err.println("Invalid: elements use both @id and references: " + length);
-			isValid = false;
-		    //for (int i = 0; i < both_id_ref.getLength(); i++) {
-			    //Node n = both_id_ref.item(i);
-			    //debugNode(n, "");
+            System.err.println("Invalid: elements use both @id and references: " + length);
+            isValid = false;
+            //for (int i = 0; i < both_id_ref.getLength(); i++) {
+            //Node n = both_id_ref.item(i);
+            //debugNode(n, "");
             //}
         }
 
-		// TODO: When `references` is used, the `system` attribute MUST have the same value in both the target and source elements, or it must be absent in both. For now, we have decided to not enforce this constraint, as snobody seems to use it.
+        // TODO: When `references` is used, the `system` attribute MUST have the 
+        // same value in both the target and source elements, or it must be 
+        // absent in both. For now, we have decided to not enforce this 
+        // constraint, as snobody seems to use it.
 
-		return isValid;
-	}
+        return isValid;
+    }
 
-	private Vector getXPathValues(String xpath) {
-		// Use the simple XPath API to select a nodeIterator.
-		Vector<String> values = new Vector<String>();
-		try {
-			NodeList nl = XPathAPI.selectNodeList(doc, xpath);
+    /**
+     * Build a parsed version of the XML DOM using a default XML parser.
+     * @param in the InputSource for the file to be parsed
+     * @return an XML DOM Document representing the file
+     */
+    private Document parseDocument(InputSource in) throws Exception {
+        DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
+        dfactory.setNamespaceAware(false);
+        Document doc = dfactory.newDocumentBuilder().parse(in);
+        return doc;
+    }
 
-			for (int i = 0; i < nl.getLength(); i++) {
-				Node n = nl.item(i);
-				n.normalize();
-				Node t = n.getFirstChild();
-				values.add(t.getNodeValue());
-			}
+    /**
+     * Extract a vector of text values from the document using an XPath
+     * expression.
+     */
+    private Vector getXPathValues(String xpath) {
+        // Use the simple XPath API to select a nodeIterator.
+        Vector<String> values = new Vector<String>();
+        try {
+            NodeList nl = XPathAPI.selectNodeList(doc, xpath);
 
-		} catch (Exception e) {
-			System.out.println(e.getMessage());
-		}
-		return values;
-	}
+            for (int i = 0; i < nl.getLength(); i++) {
+                Node n = nl.item(i);
+                n.normalize();
+                Node t = n.getFirstChild();
+                values.add(t.getNodeValue());
+            }
+
+        } catch (Exception e) {
+            System.err.println(e.getMessage());
+        }
+        return values;
+    }
 
-	private NodeList getXPathNodeList(String xpath, Node n) {
+    /**
+     * Extract a NodeList using an XPath on the provided Node n. 
+     * @param xpath an XPath expression to be executed
+     * @param n the XML DOM Node on which to execute the XPath
+     */
+    private NodeList getXPathNodeList(String xpath, Node n) {
         NodeList nl = null;
-		try {
-			nl = XPathAPI.selectNodeList(n, xpath);
-		} catch (Exception e) {
-			System.out.println(e.getMessage());
-		}
+        try {
+            nl = XPathAPI.selectNodeList(n, xpath);
+        } catch (Exception e) {
+            System.err.println(e.getMessage());
+        }
         return nl;
-	}
+    }
 
-	private NodeList getXPathNodeList(String xpath) {
+    /**
+     * Extract a NodeList ussing an XPath n the default DOM document for the
+     * instantiated EMLValidator.
+     * @param xpath an XPath expression to be executed
+     */
+    private NodeList getXPathNodeList(String xpath) {
         return getXPathNodeList(xpath, doc);
-	}
+    }
 
+    /**
+     * Print out a Node for debugging purposes.
+     */
     private void debugNode(Node node, String spacer) {
-    if (node == null)
-      return;
-    switch (node.getNodeType()) {
-    case Node.ELEMENT_NODE:
-      String name = node.getNodeName();
-      System.out.print(spacer + "<" + name);
-      NamedNodeMap nnm = node.getAttributes();
-      for (int i = 0; i < nnm.getLength(); i++) {
-        Node current = nnm.item(i);
-        System.out.print(" " + current.getNodeName() + "= " + current.getNodeValue());
-      }
-      System.out.print(">");
-      NodeList nl = node.getChildNodes();
-      if (nl != null) {
-        for (int i = 0; i < nl.getLength(); i++) {
-          debugNode(nl.item(i), "");
-        }
-      }
-      System.out.println(spacer + "</" + name + ">");
-      break;
-    case Node.TEXT_NODE:
-      System.out.print(node.getNodeValue());
-      break;
-    case Node.CDATA_SECTION_NODE:
-      System.out.print("" + node.getNodeValue() + "");
-      break;
-    case Node.ENTITY_REFERENCE_NODE:
-      System.out.print("&" + node.getNodeName() + ";");
-      break;
-    case Node.ENTITY_NODE:
-      System.out.print("<ENTITY: " + node.getNodeName() + "> </" + node.getNodeName() + "/>");
-      break;
-    case Node.DOCUMENT_NODE:
-      NodeList nodes = node.getChildNodes();
-      if (nodes != null) {
-        for (int i = 0; i < nodes.getLength(); i++) {
-          debugNode(nodes.item(i), "");
+        if (node == null)
+            return;
+        switch (node.getNodeType()) {
+        case Node.ELEMENT_NODE:
+            String name = node.getNodeName();
+            System.out.print(spacer + "<" + name);
+            NamedNodeMap nnm = node.getAttributes();
+            for (int i = 0; i < nnm.getLength(); i++) {
+                Node current = nnm.item(i);
+                System.out.print(" " + current.getNodeName() + "= " + current.getNodeValue());
+            }
+            System.out.print(">");
+            NodeList nl = node.getChildNodes();
+            if (nl != null) {
+                for (int i = 0; i < nl.getLength(); i++) {
+                    debugNode(nl.item(i), "");
+                }
+            }
+            System.out.println(spacer + "</" + name + ">");
+            break;
+        case Node.TEXT_NODE:
+            System.out.print(node.getNodeValue());
+            break;
+        case Node.CDATA_SECTION_NODE:
+            System.out.print("" + node.getNodeValue() + "");
+            break;
+        case Node.ENTITY_REFERENCE_NODE:
+            System.out.print("&" + node.getNodeName() + ";");
+            break;
+        case Node.ENTITY_NODE:
+            System.out.print("<ENTITY: " + node.getNodeName() + "> </" + node.getNodeName() + "/>");
+            break;
+        case Node.DOCUMENT_NODE:
+            NodeList nodes = node.getChildNodes();
+            if (nodes != null) {
+                for (int i = 0; i < nodes.getLength(); i++) {
+                    debugNode(nodes.item(i), "");
+                }
+            }
+            break;
+        case Node.DOCUMENT_TYPE_NODE:
+            DocumentType docType = (DocumentType) node;
+            System.out.print("<!DOCTYPE " + docType.getName());
+            if (docType.getPublicId() != null) {
+                System.out.print(" PUBLIC " + docType.getPublicId() + " ");
+            } else {
+                System.out.print(" SYSTEM ");
+            }
+            System.out.println(" " + docType.getSystemId() + ">");
+            break;
+        default:
+            break;
         }
-      }
-      break;
-    case Node.DOCUMENT_TYPE_NODE:
-      DocumentType docType = (DocumentType) node;
-      System.out.print("<!DOCTYPE " + docType.getName());
-      if (docType.getPublicId() != null) {
-        System.out.print(" PUBLIC " + docType.getPublicId() + " ");
-      } else {
-        System.out.print(" SYSTEM ");
-      }
-      System.out.println(" " + docType.getSystemId() + ">");
-      break;
-    default:
-      break;
     }
-  }
 }