-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
StackExchange7zXmlEventReaderSource.java
58 lines (47 loc) · 1.87 KB
/
StackExchange7zXmlEventReaderSource.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package nu.marginalia.integration.stackexchange.xml;
import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
public class StackExchange7zXmlEventReaderSource implements XmlEventReaderSource {
static {
// We need to set this for SAX reasons. Something to do with reading
// XML files with more than 50,000,000 entities being forbidden to enhance
// security somehow. Since we're using STAX, these aren't
// software-configurable.
System.setProperty("jdk.xml.totalEntitySizeLimit", "0");
}
private final XMLEventReader reader;
private final SevenZFile postsFile;
public StackExchange7zXmlEventReaderSource(Path pathTo7zFile, String xmlFileName)
throws IOException, XMLStreamException
{
postsFile = new SevenZFile(pathTo7zFile.toFile());
SevenZArchiveEntry postsEntry = null;
for (SevenZArchiveEntry entry : postsFile.getEntries()) {
if (xmlFileName.equals(entry.getName())) {
postsEntry = entry;
break;
}
}
if (postsEntry == null) {
postsFile.close();
throw new FileNotFoundException("No " + xmlFileName + " in provided archive");
}
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
reader = xmlInputFactory.createXMLEventReader(postsFile.getInputStream(postsEntry));
}
@Override
public XMLEventReader reader() {
return reader;
}
@Override
public void close() throws Exception {
reader.close();
postsFile.close();
}
}