package com.demo.flatpack; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import net.sf.flatpack.util.FPConstants; /** * Improved delimited content parser that resolves issues found in the DelimiterParser class in flatpack version 4.0.4 * Parses content according to https://tools.ietf.org/html/rfc4180 */ public class DelimiterParser extends net.sf.flatpack.DelimiterParser { /** Line separator */ private static final String LINE_BREAK = System.lineSeparator(); /** number of found lines (records) */ private int mLineCount = 0; /** * Constructor * @param aPZMapXML * @param aDataSource * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public DelimiterParser(File aPZMapXML, File aDataSource, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aPZMapXML, aDataSource, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Constructor * @param aPZMapXMLStream * @param aDataSourceStream * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public DelimiterParser(InputStream aPZMapXMLStream, InputStream aDataSourceStream, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aPZMapXMLStream, aDataSourceStream, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Constructor * @param aDataSource * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public DelimiterParser(File aDataSource, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aDataSource, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Constructor * @param aDataSourceStream * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public AeDelimiterParser(InputStream aDataSourceStream, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aDataSourceStream, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Constructor * @param aDataSourceReader * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public DelimiterParser(Reader aDataSourceReader, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aDataSourceReader, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Constructor * @param aDataSourceReader * @param aPZMapReader * @param aDelimiteriter * @param aQualifierifier * @param aIgnoreFirstRecord */ public DelimiterParser(Reader aDataSourceReader, Reader aPZMapReader, char aDelimiteriter, char aQualifierifier, boolean aIgnoreFirstRecord) { super(aDataSourceReader, aPZMapReader, aDelimiteriter, aQualifierifier, aIgnoreFirstRecord); configure(); } /** * Configure parser. */ protected void configure() { setPreserveLeadingWhitespace(true); // ideally should be true but I left it false for better backward compatibility setPreserveTrailingWhitespace(false); } /** * @see DelimiterParser#getLineCount() */ @Override protected int getLineCount() { return mLineCount; } /** * Improved version of line fetching that solves some of the issues of flatpack parser. */ @Override protected String fetchNextRecord(BufferedReader aContentReader, char aQualifier, char aDelimiter) throws IOException { if (aQualifier == FPConstants.NO_QUALIFIER) { // no qualifier defined, then there can't be line breaks in the line return aContentReader.readLine(); } StringBuilder lineData = null; String line = null; boolean multiline = false; // consuming lines until we find end of the data row while ((line = aContentReader.readLine()) != null) { if(lineData == null) { lineData = new StringBuilder(line); } else { lineData.append(LINE_BREAK).append(line); } multiline = isMultiline(line.toCharArray(), multiline, aQualifier, aDelimiter); if(! multiline) { // data row ended break; } } if(lineData != null) { mLineCount++; String result = lineData.toString(); // no line break character at the end of data row return result.endsWith(LINE_BREAK) ? result.substring(0, result.length() - LINE_BREAK.length()) : result; } return null; } /** * Checks if we need to consume one more line because data row was splitted to multiple lines. * @param aСhrArry * @param aMultiline * @param aQualifier * @param aDelimiter * @return */ protected boolean isMultiline(char[] aСhrArray, boolean aMultiline, char aQualifier, char aDelimiter) { // do not trim the line, according to rfc4180: // Spaces are considered part of a field and should not be ignored int position = 0; do { // field processing here if (! aMultiline && aСhrArray[position] == aDelimiter) { // empty field position++; } else if (!aMultiline && aСhrArray[position] != aQualifier) { // if the first char of the line is NOT a qualifier, then the field should not // contain CRLF, double quotes, and commas // therefore find the end of the field by looking for the first delimiter while (++position < aСhrArray.length) { if (aСhrArray[position] == aDelimiter) { position++; break; } } if (position >= aСhrArray.length) { // end of the line without any delimiters so it's safe to say its the end of the line // and not multiline return false; } } else { // the first char is a qualifier, the field may contain CRLF, double quotes, and commas // double quotes must be escaped with a double quote (i.e. "some ""data"" here"). // newline won't be present in the line because it's removed by the reader during // readLine() call. so look for dangling " aMultiline = true; if(aСhrArray[position] == aQualifier) { // if we have just now found a qualifier we need to pome cursor to the next char position++; } // looking for the end of the text field while(position < aСhrArray.length) { if(aСhrArray[position] == aQualifier) { if(position == (aСhrArray.length - 1) || aСhrArray[position + 1] != aQualifier) { // end of text found position++; aMultiline = false; break; } else { // skipping escaped qualified like "" position += 2; } } else { position++; } } } } while( position < aСhrArray.length - 1 ); return aMultiline; } }