Skip to content

Commit

Permalink
Check if valid FpML document (#1982)
Browse files Browse the repository at this point in the history
* Check if valid FpML document

Provide method to check if document is FpML
Build on top of filtered parse method in `XmlFile`

* Updates from PR
  • Loading branch information
jodastephen committed May 29, 2019
1 parent 90f3e1e commit bcfb572
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 17 deletions.
Expand Up @@ -9,6 +9,7 @@
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.Map;
import java.util.function.ToIntFunction;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
Expand Down Expand Up @@ -101,6 +102,36 @@ public static XmlFile of(ByteSource source, String refAttrName) {
});
}

/**
* Parses the element names and structure from the specified XML, filtering to reduce memory usage.
* <p>
* This parses the specified byte source expecting an XML file format.
* The filter function takes the element name and decides how many child levels should be returned in the response.
* Always returning {@code Integer.MAX_VALUE} will not filter the children.
* For example, a function could check if the name is "trade" and return only the immediate children by returning 1.
*
* @param source the XML source data
* @param filterFn the filter function to use
* @return the parsed file
* @throws UncheckedIOException if an IO exception occurs
* @throws IllegalArgumentException if the file cannot be parsed
*/
public static XmlElement parseElements(ByteSource source, ToIntFunction<String> filterFn) {
ArgChecker.notNull(source, "source");
ArgChecker.notNull(filterFn, "filterFn");
ToIntFunction<String> safeFilterFn = name -> Math.max(filterFn.applyAsInt(name), 0);
return Unchecked.wrap(() -> {
try (InputStream in = source.openBufferedStream()) {
XMLStreamReader xmlReader = xmlInputFactory().createXMLStreamReader(in);
try {
return parseElements(xmlReader, safeFilterFn, Integer.MAX_VALUE);
} finally {
xmlReader.close();
}
}
});
}

//-------------------------------------------------------------------------
/**
* Parses the tree from the StAX stream reader, capturing references.
Expand Down Expand Up @@ -159,6 +190,36 @@ private static XmlElement parse(XMLStreamReader reader, String refAttr, Map<Stri
}
}

// parses the element structure from the input, filtering as necessary
private static XmlElement parseElements(XMLStreamReader reader, ToIntFunction<String> filterFn, int currentLevel) {
try {
// parse start element
String elementName = parseElementName(reader);

// parse children or content
ImmutableList.Builder<XmlElement> childBuilder = ImmutableList.builder();
int event = reader.next();
while (event != XMLStreamConstants.END_ELEMENT) {
if (event == XMLStreamConstants.START_ELEMENT) {
int childLevel = currentLevel == Integer.MAX_VALUE ? filterFn.applyAsInt(elementName) : currentLevel - 1;
XmlElement child = parseElements(reader, filterFn, childLevel);
if (childLevel > 0) {
childBuilder.add(child);
}
}
event = reader.next();
}
ImmutableList<XmlElement> children = childBuilder.build();
XmlElement parsed = children.isEmpty() ?
XmlElement.ofContent(elementName, "") :
XmlElement.ofChildren(elementName, children);
return parsed;

} catch (XMLStreamException ex) {
throw new IllegalArgumentException(ex);
}
}

// find the start element and parses the name
private static String parseElementName(XMLStreamReader reader) throws XMLStreamException {
int event = reader.getEventType();
Expand Down
Expand Up @@ -37,6 +37,7 @@ public class XmlFileTest {
" <leaf1>l<![CDATA[e]]>af</leaf1>" +
" <leaf2>a<!-- comment ignored --></leaf2>" +
" <leaf2>b</leaf2>" +
" <obj><leaf3>c</leaf3></obj>" +
" </test>" +
"</base>";
private static final String SAMPLE_MISMATCHED_TAGS = "" +
Expand All @@ -59,10 +60,12 @@ public class XmlFileTest {

private static final Map<String, String> ATTR_MAP_EMPTY = ImmutableMap.of();
private static final Map<String, String> ATTR_MAP = ImmutableMap.of("key", "value", "og", "strata");
private static final XmlElement LEAF1 = XmlElement.ofContent("leaf1", ATTR_MAP_EMPTY, "leaf");
private static final XmlElement LEAF2A = XmlElement.ofContent("leaf2", ATTR_MAP_EMPTY, "a");
private static final XmlElement LEAF2B = XmlElement.ofContent("leaf2", ATTR_MAP_EMPTY, "b");
private static final List<XmlElement> CHILD_LIST_MULTI = ImmutableList.of(LEAF1, LEAF2A, LEAF2B);
private static final XmlElement LEAF1 = XmlElement.ofContent("leaf1", "leaf");
private static final XmlElement LEAF2A = XmlElement.ofContent("leaf2", "a");
private static final XmlElement LEAF2B = XmlElement.ofContent("leaf2", "b");
private static final XmlElement LEAF3 = XmlElement.ofContent("leaf3", "c");
private static final XmlElement OBJ = XmlElement.ofChildren("obj", ImmutableList.of(LEAF3));
private static final List<XmlElement> CHILD_LIST_MULTI = ImmutableList.of(LEAF1, LEAF2A, LEAF2B, OBJ);
private static final Object ANOTHER_TYPE = "";

//-------------------------------------------------------------------------
Expand Down Expand Up @@ -131,6 +134,67 @@ public void test_of_ByteSource_parsedReferences_ioException() {
assertThrows(() -> XmlFile.of(source, "key"), UncheckedIOException.class);
}

//-------------------------------------------------------------------------
public void test_parseElements_ByteSource_Fn_noFilter() {
List<XmlElement> expected = ImmutableList.of(
XmlElement.ofContent("leaf1", ""),
XmlElement.ofContent("leaf2", ""),
XmlElement.ofContent("leaf2", ""),
XmlElement.ofChildren("obj", ImmutableList.of(XmlElement.ofContent("leaf3", ""))));

ByteSource source = ByteSource.wrap(SAMPLE.getBytes(StandardCharsets.UTF_8));
XmlElement test = XmlFile.parseElements(source, name -> Integer.MAX_VALUE);
assertEquals(test.getName(), "base");
assertEquals(test.getAttributes(), ATTR_MAP_EMPTY);
assertEquals(test.getContent(), "");
assertEquals(test.getChildren().size(), 1);
XmlElement child = test.getChild(0);
assertEquals(child, XmlElement.ofChildren("test", expected));
}

public void test_parseElements_ByteSource_Fn_filterAll() {
ByteSource source = ByteSource.wrap(SAMPLE.getBytes(StandardCharsets.UTF_8));
XmlElement test = XmlFile.parseElements(source, name -> name.equals("test") ? 0 : Integer.MAX_VALUE);
assertEquals(test.getName(), "base");
assertEquals(test.getAttributes(), ATTR_MAP_EMPTY);
assertEquals(test.getContent(), "");
assertEquals(test.getChildren().size(), 1);
XmlElement child = test.getChild(0);
assertEquals(child, XmlElement.ofContent("test", ""));
}

public void test_parseElements_ByteSource_Fn_filterOneLevel() {
List<XmlElement> expected = ImmutableList.of(
XmlElement.ofContent("leaf1", ""),
XmlElement.ofContent("leaf2", ""),
XmlElement.ofContent("leaf2", ""),
XmlElement.ofContent("obj", ""));

ByteSource source = ByteSource.wrap(SAMPLE.getBytes(StandardCharsets.UTF_8));
XmlElement test = XmlFile.parseElements(source, name -> name.equals("test") ? 1 : Integer.MAX_VALUE);
assertEquals(test.getName(), "base");
assertEquals(test.getAttributes(), ATTR_MAP_EMPTY);
assertEquals(test.getContent(), "");
assertEquals(test.getChildren().size(), 1);
XmlElement child = test.getChild(0);
assertEquals(child, XmlElement.ofChildren("test", expected));
}

public void test_parseElements_ByteSource_Fn_mismatchedTags() {
ByteSource source = ByteSource.wrap(SAMPLE_MISMATCHED_TAGS.getBytes(StandardCharsets.UTF_8));
assertThrowsIllegalArg(() -> XmlFile.parseElements(source, name -> Integer.MAX_VALUE));
}

public void test_parseElements_ByteSource_Fn_badEnd() {
ByteSource source = ByteSource.wrap(SAMPLE_BAD_END.getBytes(StandardCharsets.UTF_8));
assertThrowsIllegalArg(() -> XmlFile.parseElements(source, name -> Integer.MAX_VALUE));
}

public void test_parseElements_ByteSource_Fn_ioException() {
ByteSource source = Files.asByteSource(new File("/oh-dear-no-such-file"));
assertThrows(() -> XmlFile.parseElements(source, name -> Integer.MAX_VALUE), UncheckedIOException.class);
}

//-------------------------------------------------------------------------
public void test_equalsHashCodeToString() {
ByteSource source = ByteSource.wrap(SAMPLE.getBytes(StandardCharsets.UTF_8));
Expand Down
Expand Up @@ -7,6 +7,7 @@

import static com.opengamma.strata.collect.Guavate.toImmutableSet;

import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -103,6 +104,31 @@ public static FpmlDocumentParser of(
return of(ourPartySelector, tradeInfoParser, FpmlParserPlugin.extendedEnum().lookupAllNormalized());
}

/**
* Obtains an instance of the parser, based on the specified selector, trade info plugin and reference data.
* <p>
* The FpML parser has a number of plugin points that can be controlled:
* <ul>
* <li>the {@linkplain FpmlPartySelector party selector}
* <li>the {@linkplain FpmlTradeInfoParserPlugin trade info parser}
* <li>the {@linkplain FpmlParserPlugin trade parsers}
* <li>the {@linkplain ReferenceData reference data}
* </ul>
* This method uses the trade parsers registered in {@link FpmlParserPlugin} configuration.
*
* @param ourPartySelector the selector used to find "our" party within the set of parties in the FpML document
* @param tradeInfoParser the trade info parser
* @param refData the reference data to use
* @return the document parser
*/
public static FpmlDocumentParser of(
FpmlPartySelector ourPartySelector,
FpmlTradeInfoParserPlugin tradeInfoParser,
ReferenceData refData) {

return of(ourPartySelector, tradeInfoParser, FpmlParserPlugin.extendedEnum().lookupAllNormalized(), refData);
}

/**
* Obtains an instance of the parser, based on the specified selector and plugins.
* <p>
Expand Down Expand Up @@ -175,14 +201,43 @@ private FpmlDocumentParser(
}

//-------------------------------------------------------------------------
/**
* Checks if the source can be parsed as FpML.
* <p>
* This parses the specified byte source to determine if it appears to be FpML.
* <p>
* Sometimes, the FpML document is embedded in a non-FpML wrapper.
* This method will intelligently find the FpML document at the root, within any children of
* the root, or within any grand-children of the root.
* The FpML root element is the one that contains both {@code <trade>} and {@code <party>}.
*
* @param source the source to check
* @return true if the source appears to be FpML
* @throws UncheckedIOException if an IO error occurred
*/
public boolean isKnownFormat(ByteSource source) {
try {
XmlElement root = XmlFile.parseElements(
source,
name -> name.equals("party") || name.equals("trade") ? 0 : Integer.MAX_VALUE);
XmlElement actualRoot = findFpmlRoot(root);
return actualRoot != null;
} catch (UncheckedIOException ex) {
throw ex;
} catch (RuntimeException ex) {
return false;
}
}

/**
* Parses FpML from the specified source, extracting the trades.
* <p>
* This parses the specified byte source which must be an XML document.
* <p>
* Sometimes, the FpML document is embedded in a non-FpML wrapper.
* This method will intelligently find the FpML document at the root or within one or two levels
* of wrapper by searching for an element that contains both {@code <trade>} and {@code <party>}.
* This method will intelligently find the FpML document at the root, within any children of
* the root, or within any grand-children of the root.
* The FpML root element is the one that contains both {@code <trade>} and {@code <party>}.
*
* @param source the source of the FpML XML document
* @return the parsed trades
Expand All @@ -191,6 +246,9 @@ private FpmlDocumentParser(
public List<Trade> parseTrades(ByteSource source) {
XmlFile xmlFile = XmlFile.of(source, FpmlDocument.ID);
XmlElement root = findFpmlRoot(xmlFile.getRoot());
if (root == null) {
throw new FpmlParseException("Unable to find FpML root element");
}
return parseTrades(root, xmlFile.getReferences());
}

Expand All @@ -216,7 +274,7 @@ private static XmlElement findFpmlRoot(XmlElement root) {
}
}
}
throw new FpmlParseException("Unable to find FpML root element");
return null;
}

// simple check to see if this is an FpML root
Expand Down

0 comments on commit bcfb572

Please sign in to comment.