Skip to content

Commit

Permalink
DomLexicalProcessor: cleanup, XML detection regex as constant
Browse files Browse the repository at this point in the history
Regex also uses non-reluctant matching for head white-spaces, result
should be the same.
  • Loading branch information
virgo47 committed Jun 9, 2020
1 parent 8e2b40d commit e16d813
Showing 1 changed file with 52 additions and 43 deletions.
Expand Up @@ -6,43 +6,46 @@
*/
package com.evolveum.midpoint.prism.impl.lex.dom;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.Validate;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.staxmate.dom.DOMConverter;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import com.evolveum.midpoint.prism.*;
import com.evolveum.midpoint.prism.impl.ParserElementSource;
import com.evolveum.midpoint.prism.impl.lex.LexicalProcessor;
import com.evolveum.midpoint.prism.impl.lex.LexicalUtils;
import com.evolveum.midpoint.prism.impl.xnode.*;
import com.evolveum.midpoint.prism.marshaller.XNodeProcessorEvaluationMode;
import com.evolveum.midpoint.prism.schema.SchemaRegistry;
import com.evolveum.midpoint.prism.xnode.*;
import com.evolveum.midpoint.prism.xnode.MapXNode;
import com.evolveum.midpoint.prism.xnode.RootXNode;
import com.evolveum.midpoint.prism.xnode.XNode;
import com.evolveum.midpoint.util.DOMUtil;
import com.evolveum.midpoint.util.QNameUtil;
import com.evolveum.midpoint.util.exception.SchemaException;
import com.evolveum.midpoint.util.exception.SystemException;
import com.evolveum.midpoint.util.logging.Trace;
import com.evolveum.midpoint.util.logging.TraceManager;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang.Validate;
import org.codehaus.staxmate.dom.DOMConverter;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DomLexicalProcessor implements LexicalProcessor<String> {

Expand Down Expand Up @@ -98,8 +101,10 @@ private XMLInputFactory getXMLInputFactory() {

// code taken from Validator class
@Override
public void readObjectsIteratively(@NotNull ParserSource source, @NotNull ParsingContext parsingContext,
RootXNodeHandler handler) throws SchemaException, IOException {
public void readObjectsIteratively(@NotNull ParserSource source,
@NotNull ParsingContext parsingContext, RootXNodeHandler handler)
throws SchemaException, IOException {

InputStream is = source.getInputStream();
XMLStreamReader stream = null;
try {
Expand Down Expand Up @@ -131,24 +136,27 @@ public void readObjectsIteratively(@NotNull ParserSource source, @NotNull Parsin
String lineInfo = stream != null
? " on line " + stream.getLocation().getLineNumber()
: "";
throw new SchemaException("Exception while parsing XML" + lineInfo + ": " + ex.getMessage(), ex);
throw new SchemaException(
"Exception while parsing XML" + lineInfo + ": " + ex.getMessage(), ex);
} finally {
if (source.closeStreamAfterParsing()) {
IOUtils.closeQuietly(is);
}
}
}

private boolean readSingleObjectIteratively(XMLStreamReader stream, Map<String, String> rootNamespaceDeclarations,
DOMConverter domConverter, RootXNodeHandler handler) throws XMLStreamException, SchemaException {
private boolean readSingleObjectIteratively(
XMLStreamReader stream, Map<String, String> rootNamespaceDeclarations,
DOMConverter domConverter, RootXNodeHandler handler)
throws XMLStreamException, SchemaException {
Document objectDoc = domConverter.buildDocument(stream);
Element objectElement = DOMUtil.getFirstChildElement(objectDoc);
DOMUtil.setNamespaceDeclarations(objectElement, rootNamespaceDeclarations);
RootXNodeImpl rootNode = read(objectElement);
return handler.handleData(rootNode);
}

private List<RootXNodeImpl> readObjects(Document document) throws SchemaException{
private List<RootXNodeImpl> readObjects(Document document) throws SchemaException {
Element root = DOMUtil.getFirstChildElement(document);
QName objectsMarker = schemaRegistry.getPrismContext().getObjectsElementName();
if (objectsMarker != null && !QNameUtil.match(DOMUtil.getQName(root), objectsMarker)) {
Expand Down Expand Up @@ -180,7 +188,9 @@ public RootXNodeImpl read(Element rootElement) throws SchemaException {
return xroot;
}

private void extractCommonMetadata(Element element, QName xsiType, XNodeImpl xnode) throws SchemaException {
private void extractCommonMetadata(Element element, QName xsiType, XNodeImpl xnode)
throws SchemaException {

if (xsiType != null) {
xnode.setTypeQName(xsiType);
xnode.setExplicitTypeDeclaration(true);
Expand Down Expand Up @@ -254,7 +264,7 @@ private MapXNodeImpl parseElementContentToMap(Element element) throws SchemaExce
MapXNodeImpl xmap = new MapXNodeImpl();

// Attributes
for (Attr attr: DOMUtil.listApplicationAttributes(element)) {
for (Attr attr : DOMUtil.listApplicationAttributes(element)) {
QName attrQName = DOMUtil.getQName(attr);
XNodeImpl subnode = parseAttributeValue(attr);
xmap.put(attrQName, subnode);
Expand All @@ -263,7 +273,7 @@ private MapXNodeImpl parseElementContentToMap(Element element) throws SchemaExce
// Sub-elements
QName lastElementName = null;
List<Element> lastElements = null;
for (Element childElement: DOMUtil.listChildElements(element)) {
for (Element childElement : DOMUtil.listChildElements(element)) {
QName childName = DOMUtil.getQName(childElement);
if (!match(childName, lastElementName)) {
parseSubElementsGroupAsMapEntry(xmap, lastElementName, lastElements);
Expand Down Expand Up @@ -315,7 +325,7 @@ private boolean elementsAreCompatible(List<Element> elements) {
}

private QName getHierarchyRoot(QName name) {
ItemDefinition def = schemaRegistry.findItemDefinitionByElementName(name);
ItemDefinition<?> def = schemaRegistry.findItemDefinitionByElementName(name);
if (def == null || !def.isHeterogeneousListItem()) {
return name;
} else {
Expand Down Expand Up @@ -355,7 +365,7 @@ private void parseSubElementsGroupAsMapEntry(MapXNodeImpl xmap, QName elementNam

/**
* Parses elements that should form the list.
*
* <p>
* Either they have the same element name, or they are stored as a sub-elements of "list" parent element.
*/
@NotNull
Expand All @@ -365,7 +375,7 @@ private ListXNodeImpl parseElementList(List<Element> elements, QName elementName
throw new IllegalArgumentException("When !storeElementNames the element name must be specified");
}
ListXNodeImpl xlist = new ListXNodeImpl();
for (Element element: elements) {
for (Element element : elements) {
xlist.add(parseElementContent(element, elementName, storeElementNames));
}
return xlist;
Expand Down Expand Up @@ -408,13 +418,14 @@ public boolean canRead(@NotNull File file) {
return file.getName().endsWith(".xml");
}

private static final Pattern XML_DETECTION_PATTERN = Pattern.compile("\\A\\s*<\\w+");

@Override
public boolean canRead(@NotNull String dataString) {
if (dataString.startsWith("<?xml")) {
return true;
} else {
Pattern p = Pattern.compile("\\A\\s*?<\\w+");
Matcher m = p.matcher(dataString);
Matcher m = XML_DETECTION_PATTERN.matcher(dataString);
return m.find();
}
}
Expand Down Expand Up @@ -474,18 +485,18 @@ private Element serializeToElement(XNodeImpl xnode, QName elementName) throws Sc
} else if (xnode instanceof PrimitiveXNodeImpl<?>) {
return serializeXPrimitiveToElement((PrimitiveXNodeImpl<?>) xnode, elementName);
} else if (xnode instanceof RootXNodeImpl) {
return writeXRootToElement((RootXNodeImpl)xnode);
return writeXRootToElement((RootXNodeImpl) xnode);
} else if (xnode instanceof ListXNodeImpl) {
ListXNodeImpl xlist = (ListXNodeImpl) xnode;
if (xlist.size() == 0) {
return null;
} else if (xlist.size() > 1) {
throw new IllegalArgumentException("Cannot serialize list xnode with more than one item: "+xlist);
throw new IllegalArgumentException("Cannot serialize list xnode with more than one item: " + xlist);
} else {
return serializeToElement(xlist.get(0), elementName);
}
} else {
throw new IllegalArgumentException("Cannot serialize "+xnode+" to element");
throw new IllegalArgumentException("Cannot serialize " + xnode + " to element");
}
}

Expand All @@ -498,6 +509,4 @@ public Element serializeSingleElementMapToElement(MapXNode map) throws SchemaExc
Element parent = serializeToElement(xmap, subEntry.getKey());
return DOMUtil.getFirstChildElement(parent);
}


}

0 comments on commit e16d813

Please sign in to comment.