-
-
Notifications
You must be signed in to change notification settings - Fork 15
/
EMLValidator.java
257 lines (235 loc) · 9.33 KB
/
EMLValidator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
package org.ecoinformatics.eml;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.xpath.XPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* Validate an EML document by executing additional validation checks that are
* not already encoded in XML Schema. These are described in the EML
* specification.
*
* Typical usage is to instantiate EMLValidator against an EML filename, and
* then execute the validate() method.
*/
public class EMLValidator {
// The local document being validated, created during construction
private Document doc = null;
/**
* Construct an EMLValidator for use on a given file path.
* @param filename the relative or absolute path to a file to be validated
*/
public EMLValidator(String filename) {
try {
FileInputStream f = new FileInputStream(new File(filename));
doc = parseDocument(new InputSource(f));
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
/**
* Simple method for command-line validation. The first argument is the
* EML filename.
*/
public static void main(String[] args) {
String emlfile = args[0];
EMLValidator validator = new EMLValidator(emlfile);
boolean isValid = validator.validate();
System.err.println("isValid: " + (new Boolean(isValid)).toString());
}
/**
* Validate the EML document that was provided when the validator was
* instantiated. Thiss runs all of the specialized checks that go beyond
* XML Schema validation. The method returns true if valid, false
* otherwise.
* @return true if the document iss valid, false otherwise
*/
public boolean validate() {
boolean isValid = true;
// All EML documents MUST have the 'eml' module as the root
NodeList root_not_eml = getXPathNodeList("/*[local-name() != 'eml']");
int length = root_not_eml.getLength();
if (length > 0) {
System.err.println("Invalid: root element is not eml.");
isValid = false;
}
// All `id` attributes and `packageId` within the document MUST be unique
Vector<String> packageId = getXPathValues("//*/@packageId");
Vector<String> ids = getXPathValues("//*[@id]/@id");
ids.addAll(packageId);
HashMap idmap = new HashMap();
for (String s : ids) {
idmap.put(s, null);
}
if (ids.size() != idmap.size()) {
System.err.println("Invalid: ID attributes must be unique. Duplicates exist.");
isValid = false;
}
// If an element references another using a child `references` element
// or references attribute, another element with that value in its `id`
// attribute MUST exist in the document
// If an `additionalMetadata` element references another using a child
// `describes` element, another element with that value in its `id`
// attribute MUST exist in the document
Vector<String> refs = getXPathValues("//annotation[@references]/@references|//references|/describes");
for (String s : refs) {
if (!ids.contains(s)) {
System.err.println("Invalid: Reference missing from IDs: " + s);
isValid = false;
}
}
// Elements which contain an `annotation` child element MUST contain an
// `id` attribute, unless the containing `annotation` element contains a
// `references` attribute
NodeList missing_id_ref = getXPathNodeList("//*[annotation and not(@id) and not(annotation[@references]) and not(parent::*/describes)]");
length = missing_id_ref.getLength();
if (length > 0) {
System.err.println("Invalid: annotations lack id or references: " + length);
isValid = false;
//for (int i = 0; i < missing_id_ref.getLength(); i++) {
//Node n = missing_id_ref.item(i);
//debugNode(n, "");
//}
}
// If an element references another using a child `references` element,
// it MUST not have an `id` attribute itself
NodeList both_id_ref = getXPathNodeList("//*[references and @id]");
length = both_id_ref.getLength();
if (length > 0) {
System.err.println("Invalid: elements use both @id and references: " + length);
isValid = false;
//for (int i = 0; i < both_id_ref.getLength(); i++) {
//Node n = both_id_ref.item(i);
//debugNode(n, "");
//}
}
// TODO: When `references` is used, the `system` attribute MUST have the
// same value in both the target and source elements, or it must be
// absent in both. For now, we have decided to not enforce this
// constraint, as snobody seems to use it.
return isValid;
}
/**
* Build a parsed version of the XML DOM using a default XML parser.
* @param in the InputSource for the file to be parsed
* @return an XML DOM Document representing the file
*/
private Document parseDocument(InputSource in) throws Exception {
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(false);
Document doc = dfactory.newDocumentBuilder().parse(in);
return doc;
}
/**
* Extract a vector of text values from the document using an XPath
* expression.
*/
private Vector getXPathValues(String xpath) {
// Use the simple XPath API to select a nodeIterator.
Vector<String> values = new Vector<String>();
try {
NodeList nl = XPathAPI.selectNodeList(doc, xpath);
for (int i = 0; i < nl.getLength(); i++) {
Node n = nl.item(i);
n.normalize();
Node t = n.getFirstChild();
values.add(t.getNodeValue());
}
} catch (Exception e) {
System.err.println(e.getMessage());
}
return values;
}
/**
* Extract a NodeList using an XPath on the provided Node n.
* @param xpath an XPath expression to be executed
* @param n the XML DOM Node on which to execute the XPath
*/
private NodeList getXPathNodeList(String xpath, Node n) {
NodeList nl = null;
try {
nl = XPathAPI.selectNodeList(n, xpath);
} catch (Exception e) {
System.err.println(e.getMessage());
}
return nl;
}
/**
* Extract a NodeList ussing an XPath n the default DOM document for the
* instantiated EMLValidator.
* @param xpath an XPath expression to be executed
*/
private NodeList getXPathNodeList(String xpath) {
return getXPathNodeList(xpath, doc);
}
/**
* Print out a Node for debugging purposes.
*/
private void debugNode(Node node, String spacer) {
if (node == null)
return;
switch (node.getNodeType()) {
case Node.ELEMENT_NODE:
String name = node.getNodeName();
System.out.print(spacer + "<" + name);
NamedNodeMap nnm = node.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
Node current = nnm.item(i);
System.out.print(" " + current.getNodeName() + "= " + current.getNodeValue());
}
System.out.print(">");
NodeList nl = node.getChildNodes();
if (nl != null) {
for (int i = 0; i < nl.getLength(); i++) {
debugNode(nl.item(i), "");
}
}
System.out.println(spacer + "</" + name + ">");
break;
case Node.TEXT_NODE:
System.out.print(node.getNodeValue());
break;
case Node.CDATA_SECTION_NODE:
System.out.print("" + node.getNodeValue() + "");
break;
case Node.ENTITY_REFERENCE_NODE:
System.out.print("&" + node.getNodeName() + ";");
break;
case Node.ENTITY_NODE:
System.out.print("<ENTITY: " + node.getNodeName() + "> </" + node.getNodeName() + "/>");
break;
case Node.DOCUMENT_NODE:
NodeList nodes = node.getChildNodes();
if (nodes != null) {
for (int i = 0; i < nodes.getLength(); i++) {
debugNode(nodes.item(i), "");
}
}
break;
case Node.DOCUMENT_TYPE_NODE:
DocumentType docType = (DocumentType) node;
System.out.print("<!DOCTYPE " + docType.getName());
if (docType.getPublicId() != null) {
System.out.print(" PUBLIC " + docType.getPublicId() + " ");
} else {
System.out.print(" SYSTEM ");
}
System.out.println(" " + docType.getSystemId() + ">");
break;
default:
break;
}
}
}