From d4c0d965bb8af93a58e21f9c3a95e1468b6b0441 Mon Sep 17 00:00:00 2001 From: chenson42 Date: Sun, 17 Apr 2011 00:35:46 +0000 Subject: [PATCH] --- future/symmetric3-csv/.classpath | 6 + future/symmetric3-csv/.project | 30 + future/symmetric3-csv/pom.xml | 37 + .../org/jumpmind/symmetric/csv/CsvReader.java | 1720 +++++++++++++++++ .../org/jumpmind/symmetric/csv/CsvWriter.java | 595 ++++++ 5 files changed, 2388 insertions(+) create mode 100644 future/symmetric3-csv/.classpath create mode 100644 future/symmetric3-csv/.project create mode 100644 future/symmetric3-csv/pom.xml create mode 100644 future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvReader.java create mode 100644 future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvWriter.java diff --git a/future/symmetric3-csv/.classpath b/future/symmetric3-csv/.classpath new file mode 100644 index 0000000000..a42d48e986 --- /dev/null +++ b/future/symmetric3-csv/.classpath @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/future/symmetric3-csv/.project b/future/symmetric3-csv/.project new file mode 100644 index 0000000000..4b6054110a --- /dev/null +++ b/future/symmetric3-csv/.project @@ -0,0 +1,30 @@ + + + symmetric3-csv + This library is a fork of a CSV library called CSVReader by Bruce Dunwiddie. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + + org.eclipse.wst.common.project.facet.core.nature + org.eclipse.jdt.core.javanature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.jem.workbench.JavaEMFNature + + diff --git a/future/symmetric3-csv/pom.xml b/future/symmetric3-csv/pom.xml new file mode 100644 index 0000000000..80e8012eda --- /dev/null +++ b/future/symmetric3-csv/pom.xml @@ -0,0 +1,37 @@ + + 4.0.0 + org.jumpmind.symmetric + symmetric-csv + jar + 2.2.2-SNAPSHOT + csv + 2010 + + + This library is a fork of a CSV library called CSVReader by Bruce Dunwiddie + + + + org.jumpmind.symmetric + symmetric-parent + 2.2.2-SNAPSHOT + ../symmetric-parent/pom.xml + + + + + LGPL + http://www.gnu.org/licenses/lgpl.txt + A business-friendly OSS license + + + + + + junit + junit + test + + + + diff --git a/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvReader.java b/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvReader.java new file mode 100644 index 0000000000..28e25094e3 --- /dev/null +++ b/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvReader.java @@ -0,0 +1,1720 @@ +/* + * Java CSV is a stream based library for reading and writing + * CSV and other delimited data. + * + * Copyright (C) Bruce Dunwiddie bruce@csvreader.com + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ +package org.jumpmind.symmetric.csv; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.text.NumberFormat; +import java.util.HashMap; + +/** + * A stream based parser for parsing delimited text data from a file or a + * stream. + */ +public class CsvReader { + private Reader inputStream = null; + + private String fileName = null; + + // this holds all the values for switches that the user is allowed to set + private UserSettings userSettings = new UserSettings(); + + private Charset charset = null; + + private boolean useCustomRecordDelimiter = false; + + // this will be our working buffer to hold data chunks + // read in from the data file + + private DataBuffer dataBuffer = new DataBuffer(); + + private ColumnBuffer columnBuffer = new ColumnBuffer(); + + private RawRecordBuffer rawBuffer = new RawRecordBuffer(); + + private boolean[] isQualified = null; + + private String rawRecord = ""; + + private HeadersHolder headersHolder = new HeadersHolder(); + + // these are all more or less global loop variables + // to keep from needing to pass them all into various + // methods during parsing + + private boolean startedColumn = false; + + private boolean startedWithQualifier = false; + + private boolean hasMoreData = true; + + private char lastLetter = '\0'; + + private boolean hasReadNextLine = false; + + private int columnsCount = 0; + + private long currentRecord = 0; + + private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT]; + + private boolean initialized = false; + + private boolean closed = false; + + /** + * Double up the text qualifier to represent an occurrence of the text + * qualifier. + */ + public static final int ESCAPE_MODE_DOUBLED = 1; + + /** + * Use a backslash character before the text qualifier to represent an + * occurrence of the text qualifier. + */ + public static final int ESCAPE_MODE_BACKSLASH = 2; + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a file + * as the data source. + * + * @param fileName + * The path to the file to use as the data source. + * @param delimiter + * The character to use as the column delimiter. + * @param charset + * The {@link java.nio.charset.Charset Charset} to use while + * parsing the data. + */ + public CsvReader(String fileName, char delimiter, Charset charset) throws FileNotFoundException { + if (fileName == null) { + throw new IllegalArgumentException("Parameter fileName can not be null."); + } + + if (charset == null) { + throw new IllegalArgumentException("Parameter charset can not be null."); + } + + if (!new File(fileName).exists()) { + throw new FileNotFoundException("File " + fileName + " does not exist."); + } + + this.fileName = fileName; + this.userSettings.Delimiter = delimiter; + this.charset = charset; + + isQualified = new boolean[values.length]; + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a file + * as the data source. Uses ISO-8859-1 as the + * {@link java.nio.charset.Charset Charset}. + * + * @param fileName + * The path to the file to use as the data source. + * @param delimiter + * The character to use as the column delimiter. + */ + public CsvReader(String fileName, char delimiter) throws FileNotFoundException { + this(fileName, delimiter, Charset.forName("ISO-8859-1")); + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a file + * as the data source. Uses a comma as the column delimiter and + * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}. + * + * @param fileName + * The path to the file to use as the data source. + */ + public CsvReader(String fileName) throws FileNotFoundException { + this(fileName, Letters.COMMA); + } + + /** + * Constructs a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a + * {@link java.io.Reader Reader} object as the data source. + * + * @param inputStream + * The stream to use as the data source. + * @param delimiter + * The character to use as the column delimiter. + */ + public CsvReader(Reader inputStream, char delimiter) { + if (inputStream == null) { + throw new IllegalArgumentException("Parameter inputStream can not be null."); + } + + this.inputStream = inputStream; + this.userSettings.Delimiter = delimiter; + initialized = true; + + isQualified = new boolean[values.length]; + } + + /** + * Constructs a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a + * {@link java.io.Reader Reader} object as the data source. Uses a + * comma as the column delimiter. + * + * @param inputStream + * The stream to use as the data source. + */ + public CsvReader(Reader inputStream) { + this(inputStream, Letters.COMMA); + } + + /** + * Constructs a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using an + * {@link java.io.InputStream InputStream} object as the data source. + * + * @param inputStream + * The stream to use as the data source. + * @param delimiter + * The character to use as the column delimiter. + * @param charset + * The {@link java.nio.charset.Charset Charset} to use while + * parsing the data. + */ + public CsvReader(InputStream inputStream, char delimiter, Charset charset) { + this(new InputStreamReader(inputStream, charset), delimiter); + } + + /** + * Constructs a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using an + * {@link java.io.InputStream InputStream} object as the data + * source. Uses a comma as the column delimiter. + * + * @param inputStream + * The stream to use as the data source. + * @param charset + * The {@link java.nio.charset.Charset Charset} to use while + * parsing the data. + */ + public CsvReader(InputStream inputStream, Charset charset) { + this(new InputStreamReader(inputStream, charset)); + } + + public boolean getCaptureRawRecord() { + return userSettings.CaptureRawRecord; + } + + public void setCaptureRawRecord(boolean captureRawRecord) { + userSettings.CaptureRawRecord = captureRawRecord; + } + + public String getRawRecord() { + return rawRecord; + } + + /** + * Gets whether leading and trailing whitespace characters are being trimmed + * from non-textqualified column data. Default is true. + * + * @return Whether leading and trailing whitespace characters are being + * trimmed from non-textqualified column data. + */ + public boolean getTrimWhitespace() { + return userSettings.TrimWhitespace; + } + + /** + * Sets whether leading and trailing whitespace characters should be trimmed + * from non-textqualified column data or not. Default is true. + * + * @param trimWhitespace + * Whether leading and trailing whitespace characters should + * be trimmed from non-textqualified column data or not. + */ + public void setTrimWhitespace(boolean trimWhitespace) { + userSettings.TrimWhitespace = trimWhitespace; + } + + /** + * Gets the character being used as the column delimiter. Default is comma, + * ','. + * + * @return The character being used as the column delimiter. + */ + public char getDelimiter() { + return userSettings.Delimiter; + } + + /** + * Sets the character to use as the column delimiter. Default is comma, ','. + * + * @param delimiter + * The character to use as the column delimiter. + */ + public void setDelimiter(char delimiter) { + userSettings.Delimiter = delimiter; + } + + public char getRecordDelimiter() { + return userSettings.RecordDelimiter; + } + + /** + * Sets the character to use as the record delimiter. + * + * @param recordDelimiter + * The character to use as the record delimiter. Default is + * combination of standard end of line characters for + * Windows, Unix, or Mac. + */ + public void setRecordDelimiter(char recordDelimiter) { + useCustomRecordDelimiter = true; + userSettings.RecordDelimiter = recordDelimiter; + } + + /** + * Gets the character to use as a text qualifier in the data. + * + * @return The character to use as a text qualifier in the data. + */ + public char getTextQualifier() { + return userSettings.TextQualifier; + } + + /** + * Sets the character to use as a text qualifier in the data. + * + * @param textQualifier + * The character to use as a text qualifier in the data. + */ + public void setTextQualifier(char textQualifier) { + userSettings.TextQualifier = textQualifier; + } + + /** + * Whether text qualifiers will be used while parsing or not. + * + * @return Whether text qualifiers will be used while parsing or not. + */ + public boolean getUseTextQualifier() { + return userSettings.UseTextQualifier; + } + + /** + * Sets whether text qualifiers will be used while parsing or not. + * + * @param useTextQualifier + * Whether to use a text qualifier while parsing or not. + */ + public void setUseTextQualifier(boolean useTextQualifier) { + userSettings.UseTextQualifier = useTextQualifier; + } + + /** + * Gets the character being used as a comment signal. + * + * @return The character being used as a comment signal. + */ + public char getComment() { + return userSettings.Comment; + } + + /** + * Sets the character to use as a comment signal. + * + * @param comment + * The character to use as a comment signal. + */ + public void setComment(char comment) { + userSettings.Comment = comment; + } + + /** + * Gets whether comments are being looked for while parsing or not. + * + * @return Whether comments are being looked for while parsing or not. + */ + public boolean getUseComments() { + return userSettings.UseComments; + } + + /** + * Sets whether comments are being looked for while parsing or not. + * + * @param useComments + * Whether comments are being looked for while parsing or + * not. + */ + public void setUseComments(boolean useComments) { + userSettings.UseComments = useComments; + } + + /** + * Gets the current way to escape an occurrence of the text qualifier inside + * qualified data. + * + * @return The current way to escape an occurrence of the text qualifier + * inside qualified data. + */ + public int getEscapeMode() { + return userSettings.EscapeMode; + } + + /** + * Sets the current way to escape an occurrence of the text qualifier inside + * qualified data. + * + * @param escapeMode + * The way to escape an occurrence of the text qualifier + * inside qualified data. + * @exception IllegalArgumentException + * When an illegal value is specified for escapeMode. + */ + public void setEscapeMode(int escapeMode) throws IllegalArgumentException { + if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) { + throw new IllegalArgumentException("Parameter escapeMode must be a valid value."); + } + + userSettings.EscapeMode = escapeMode; + } + + public boolean getSkipEmptyRecords() { + return userSettings.SkipEmptyRecords; + } + + public void setSkipEmptyRecords(boolean skipEmptyRecords) { + userSettings.SkipEmptyRecords = skipEmptyRecords; + } + + /** + * Safety caution to prevent the parser from using large amounts of memory + * in the case where parsing settings like file encodings don't end up + * matching the actual format of a file. This switch can be turned off if + * the file format is known and tested. With the switch off, the max column + * lengths and max column count per record supported by the parser will + * greatly increase. Default is true. + * + * @return The current setting of the safety switch. + */ + public boolean getSafetySwitch() { + return userSettings.SafetySwitch; + } + + /** + * Safety caution to prevent the parser from using large amounts of memory + * in the case where parsing settings like file encodings don't end up + * matching the actual format of a file. This switch can be turned off if + * the file format is known and tested. With the switch off, the max column + * lengths and max column count per record supported by the parser will + * greatly increase. Default is true. + * + * @param safetySwitch + */ + public void setSafetySwitch(boolean safetySwitch) { + userSettings.SafetySwitch = safetySwitch; + } + + /** + * Gets the count of columns found in this record. + * + * @return The count of columns found in this record. + */ + public int getColumnCount() { + return columnsCount; + } + + /** + * Gets the index of the current record. + * + * @return The index of the current record. + */ + public long getCurrentRecord() { + return currentRecord - 1; + } + + /** + * Gets the count of headers read in by a previous call to + * {@link org.jumpmind.symmetric.csv.CsvReader#readHeaders readHeaders()}. + * + * @return The count of headers read in by a previous call to + * {@link org.jumpmind.symmetric.csv.CsvReader#readHeaders readHeaders()}. + */ + public int getHeaderCount() { + return headersHolder.Length; + } + + /** + * Returns the header values as a string array. + * + * @return The header values as a String array. + * @exception IOException + * Thrown if this object has already been closed. + */ + public String[] getHeaders() throws IOException { + checkClosed(); + + if (headersHolder.Headers == null) { + return null; + } else { + // use clone here to prevent the outside code from + // setting values on the array directly, which would + // throw off the index lookup based on header name + String[] clone = new String[headersHolder.Length]; + System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length); + return clone; + } + } + + public void setHeaders(String[] headers) { + headersHolder.Headers = headers; + + headersHolder.IndexByName.clear(); + + if (headers != null) { + headersHolder.Length = headers.length; + } else { + headersHolder.Length = 0; + } + + // use headersHolder.Length here in case headers is null + for (int i = 0; i < headersHolder.Length; i++) { + headersHolder.IndexByName.put(headers[i], new Integer(i)); + } + } + + public String[] getValues() throws IOException { + checkClosed(); + + // need to return a clone, and can't use clone because values.Length + // might be greater than columnsCount + String[] clone = new String[columnsCount]; + System.arraycopy(values, 0, clone, 0, columnsCount); + return clone; + } + + /** + * Returns the current column value for a given column index. + * + * @param columnIndex + * The index of the column. + * @return The current column value. + * @exception IOException + * Thrown if this object has already been closed. + */ + public String get(int columnIndex) throws IOException { + checkClosed(); + + if (columnIndex > -1 && columnIndex < columnsCount) { + return values[columnIndex]; + } else { + return ""; + } + } + + /** + * Returns the current column value for a given column header name. + * + * @param headerName + * The header name of the column. + * @return The current column value. + * @exception IOException + * Thrown if this object has already been closed. + */ + public String get(String headerName) throws IOException { + checkClosed(); + + return get(getIndex(headerName)); + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using a string + * of data as the source. Uses ISO-8859-1 as the + * {@link java.nio.charset.Charset Charset}. + * + * @param data + * The String of data to use as the source. + * @return A {@link org.jumpmind.symmetric.csv.CsvReader CsvReader} object using the + * String of data as the source. + */ + public static CsvReader parse(String data) { + if (data == null) { + throw new IllegalArgumentException("Parameter data can not be null."); + } + + return new CsvReader(new StringReader(data)); + } + + /** + * Reads another record. + * + * @return Whether another record was successfully read or not. + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + public boolean readRecord() throws IOException { + checkClosed(); + + columnsCount = 0; + rawBuffer.Position = 0; + + dataBuffer.LineStart = dataBuffer.Position; + + hasReadNextLine = false; + + // check to see if we've already found the end of data + + if (hasMoreData) { + // loop over the data stream until the end of data is found + // or the end of the record is found + + do { + if (dataBuffer.Position == dataBuffer.Count) { + checkDataLength(); + } else { + startedWithQualifier = false; + + // grab the current letter as a char + + char currentLetter = dataBuffer.Buffer[dataBuffer.Position]; + + if (userSettings.UseTextQualifier && currentLetter == userSettings.TextQualifier) { + // this will be a text qualified column, so + // we need to set startedWithQualifier to make it + // enter the seperate branch to handle text + // qualified columns + + lastLetter = currentLetter; + + // read qualified + startedColumn = true; + dataBuffer.ColumnStart = dataBuffer.Position + 1; + startedWithQualifier = true; + boolean lastLetterWasQualifier = false; + + char escapeChar = userSettings.TextQualifier; + + if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { + escapeChar = Letters.BACKSLASH; + } + + boolean eatingTrailingJunk = false; + boolean lastLetterWasEscape = false; + boolean readingComplexEscape = false; + int escape = ComplexEscape.UNICODE; + int escapeLength = 0; + char escapeValue = (char) 0; + + dataBuffer.Position++; + + do { + if (dataBuffer.Position == dataBuffer.Count) { + checkDataLength(); + } else { + // grab the current letter as a char + + currentLetter = dataBuffer.Buffer[dataBuffer.Position]; + + if (eatingTrailingJunk) { + dataBuffer.ColumnStart = dataBuffer.Position + 1; + + if (currentLetter == userSettings.Delimiter) { + endColumn(); + } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF)) + || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) { + endColumn(); + + endRecord(); + } + } else if (readingComplexEscape) { + escapeLength++; + + switch (escape) { + case ComplexEscape.UNICODE: + escapeValue *= (char) 16; + escapeValue += hexToDec(currentLetter); + + if (escapeLength == 4) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.OCTAL: + escapeValue *= (char) 8; + escapeValue += (char) (currentLetter - '0'); + + if (escapeLength == 3) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.DECIMAL: + escapeValue *= (char) 10; + escapeValue += (char) (currentLetter - '0'); + + if (escapeLength == 3) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.HEX: + escapeValue *= (char) 16; + escapeValue += hexToDec(currentLetter); + + if (escapeLength == 2) { + readingComplexEscape = false; + } + + break; + } + + if (!readingComplexEscape) { + appendLetter(escapeValue); + } else { + dataBuffer.ColumnStart = dataBuffer.Position + 1; + } + } else if (currentLetter == userSettings.TextQualifier) { + if (lastLetterWasEscape) { + lastLetterWasEscape = false; + lastLetterWasQualifier = false; + } else { + updateCurrentValue(); + + if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) { + lastLetterWasEscape = true; + } + + lastLetterWasQualifier = true; + } + } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH && lastLetterWasEscape) { + switch (currentLetter) { + case 'n': + appendLetter(Letters.LF); + break; + case 'r': + appendLetter(Letters.CR); + break; + case 't': + appendLetter(Letters.TAB); + break; + case 'b': + appendLetter(Letters.BACKSPACE); + break; + case 'f': + appendLetter(Letters.FORM_FEED); + break; + case 'e': + appendLetter(Letters.ESCAPE); + break; + case 'v': + appendLetter(Letters.VERTICAL_TAB); + break; + case 'a': + appendLetter(Letters.ALERT); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + escape = ComplexEscape.OCTAL; + readingComplexEscape = true; + escapeLength = 1; + escapeValue = (char) (currentLetter - '0'); + dataBuffer.ColumnStart = dataBuffer.Position + 1; + break; + case 'u': + case 'x': + case 'o': + case 'd': + case 'U': + case 'X': + case 'O': + case 'D': + switch (currentLetter) { + case 'u': + case 'U': + escape = ComplexEscape.UNICODE; + break; + case 'x': + case 'X': + escape = ComplexEscape.HEX; + break; + case 'o': + case 'O': + escape = ComplexEscape.OCTAL; + break; + case 'd': + case 'D': + escape = ComplexEscape.DECIMAL; + break; + } + + readingComplexEscape = true; + escapeLength = 0; + escapeValue = (char) 0; + dataBuffer.ColumnStart = dataBuffer.Position + 1; + + break; + default: + break; + } + + lastLetterWasEscape = false; + + // can only happen for ESCAPE_MODE_BACKSLASH + } else if (currentLetter == escapeChar) { + updateCurrentValue(); + lastLetterWasEscape = true; + } else { + if (lastLetterWasQualifier) { + if (currentLetter == userSettings.Delimiter) { + endColumn(); + } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF)) + || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) { + endColumn(); + + endRecord(); + } else { + dataBuffer.ColumnStart = dataBuffer.Position + 1; + + eatingTrailingJunk = true; + } + + // make sure to clear the flag for next + // run of the loop + + lastLetterWasQualifier = false; + } + } + + // keep track of the last letter because we need + // it for several key decisions + + lastLetter = currentLetter; + + if (startedColumn) { + dataBuffer.Position++; + + if (userSettings.SafetySwitch + && dataBuffer.Position - dataBuffer.ColumnStart + columnBuffer.Position > 100000) { + close(); + + throw new IOException( + "Maximum column length of 100,000 exceeded in column " + + NumberFormat.getIntegerInstance().format(columnsCount) + + " in record " + + NumberFormat.getIntegerInstance().format(currentRecord) + + ". Set the SafetySwitch property to false" + + " if you're expecting column lengths greater than 100,000 characters to" + + " avoid this error."); + } + } + } // end else + + } while (hasMoreData && startedColumn); + } else if (currentLetter == userSettings.Delimiter) { + // we encountered a column with no data, so + // just send the end column + + lastLetter = currentLetter; + + endColumn(); + } else if (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter) { + // this will skip blank lines + if (startedColumn || columnsCount > 0 || !userSettings.SkipEmptyRecords) { + endColumn(); + + endRecord(); + } else { + dataBuffer.LineStart = dataBuffer.Position + 1; + } + + lastLetter = currentLetter; + } else if (!useCustomRecordDelimiter + && (currentLetter == Letters.CR || currentLetter == Letters.LF)) { + // this will skip blank lines + if (startedColumn + || columnsCount > 0 + || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) { + endColumn(); + + endRecord(); + } else { + dataBuffer.LineStart = dataBuffer.Position + 1; + } + + lastLetter = currentLetter; + } else if (userSettings.UseComments && columnsCount == 0 && currentLetter == userSettings.Comment) { + // encountered a comment character at the beginning of + // the line so just ignore the rest of the line + + lastLetter = currentLetter; + + skipLine(); + } else if (userSettings.TrimWhitespace + && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) { + // do nothing, this will trim leading whitespace + // for both text qualified columns and non + + startedColumn = true; + dataBuffer.ColumnStart = dataBuffer.Position + 1; + } else { + // since the letter wasn't a special letter, this + // will be the first letter of our current column + + startedColumn = true; + dataBuffer.ColumnStart = dataBuffer.Position; + boolean lastLetterWasBackslash = false; + boolean readingComplexEscape = false; + int escape = ComplexEscape.UNICODE; + int escapeLength = 0; + char escapeValue = (char) 0; + + boolean firstLoop = true; + + do { + if (!firstLoop && dataBuffer.Position == dataBuffer.Count) { + checkDataLength(); + } else { + if (!firstLoop) { + // grab the current letter as a char + currentLetter = dataBuffer.Buffer[dataBuffer.Position]; + } + + if (!userSettings.UseTextQualifier && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH + && currentLetter == Letters.BACKSLASH) { + if (lastLetterWasBackslash) { + lastLetterWasBackslash = false; + } else { + updateCurrentValue(); + lastLetterWasBackslash = true; + } + } else if (readingComplexEscape) { + escapeLength++; + + switch (escape) { + case ComplexEscape.UNICODE: + escapeValue *= (char) 16; + escapeValue += hexToDec(currentLetter); + + if (escapeLength == 4) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.OCTAL: + escapeValue *= (char) 8; + escapeValue += (char) (currentLetter - '0'); + + if (escapeLength == 3) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.DECIMAL: + escapeValue *= (char) 10; + escapeValue += (char) (currentLetter - '0'); + + if (escapeLength == 3) { + readingComplexEscape = false; + } + + break; + case ComplexEscape.HEX: + escapeValue *= (char) 16; + escapeValue += hexToDec(currentLetter); + + if (escapeLength == 2) { + readingComplexEscape = false; + } + + break; + } + + if (!readingComplexEscape) { + appendLetter(escapeValue); + } else { + dataBuffer.ColumnStart = dataBuffer.Position + 1; + } + } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH && lastLetterWasBackslash) { + switch (currentLetter) { + case 'n': + appendLetter(Letters.LF); + break; + case 'r': + appendLetter(Letters.CR); + break; + case 't': + appendLetter(Letters.TAB); + break; + case 'b': + appendLetter(Letters.BACKSPACE); + break; + case 'f': + appendLetter(Letters.FORM_FEED); + break; + case 'e': + appendLetter(Letters.ESCAPE); + break; + case 'v': + appendLetter(Letters.VERTICAL_TAB); + break; + case 'a': + appendLetter(Letters.ALERT); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + escape = ComplexEscape.OCTAL; + readingComplexEscape = true; + escapeLength = 1; + escapeValue = (char) (currentLetter - '0'); + dataBuffer.ColumnStart = dataBuffer.Position + 1; + break; + case 'u': + case 'x': + case 'o': + case 'd': + case 'U': + case 'X': + case 'O': + case 'D': + switch (currentLetter) { + case 'u': + case 'U': + escape = ComplexEscape.UNICODE; + break; + case 'x': + case 'X': + escape = ComplexEscape.HEX; + break; + case 'o': + case 'O': + escape = ComplexEscape.OCTAL; + break; + case 'd': + case 'D': + escape = ComplexEscape.DECIMAL; + break; + } + + readingComplexEscape = true; + escapeLength = 0; + escapeValue = (char) 0; + dataBuffer.ColumnStart = dataBuffer.Position + 1; + + break; + default: + break; + } + + lastLetterWasBackslash = false; + } else { + if (currentLetter == userSettings.Delimiter) { + endColumn(); + } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF)) + || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) { + endColumn(); + + endRecord(); + } + } + + // keep track of the last letter because we need + // it for several key decisions + + lastLetter = currentLetter; + firstLoop = false; + + if (startedColumn) { + dataBuffer.Position++; + + if (userSettings.SafetySwitch + && dataBuffer.Position - dataBuffer.ColumnStart + columnBuffer.Position > 100000) { + close(); + + throw new IOException( + "Maximum column length of 100,000 exceeded in column " + + NumberFormat.getIntegerInstance().format(columnsCount) + + " in record " + + NumberFormat.getIntegerInstance().format(currentRecord) + + ". Set the SafetySwitch property to false" + + " if you're expecting column lengths greater than 100,000 characters to" + + " avoid this error."); + } + } + } // end else + } while (hasMoreData && startedColumn); + } + + if (hasMoreData) { + dataBuffer.Position++; + } + } // end else + } while (hasMoreData && !hasReadNextLine); + + // check to see if we hit the end of the file + // without processing the current record + + if (startedColumn || lastLetter == userSettings.Delimiter) { + endColumn(); + + endRecord(); + } + } + + if (userSettings.CaptureRawRecord) { + if (hasMoreData) { + if (rawBuffer.Position == 0) { + rawRecord = new String(dataBuffer.Buffer, dataBuffer.LineStart, dataBuffer.Position + - dataBuffer.LineStart - 1); + } else { + rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position) + + new String(dataBuffer.Buffer, dataBuffer.LineStart, dataBuffer.Position + - dataBuffer.LineStart - 1); + } + } else { + // for hasMoreData to ever be false, all data would have had to + // have been + // copied to the raw buffer + rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position); + } + } else { + rawRecord = ""; + } + + return hasReadNextLine; + } + + /** + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + private void checkDataLength() throws IOException { + if (!initialized) { + if (fileName != null) { + inputStream = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charset), + StaticSettings.MAX_FILE_BUFFER_SIZE); + } + + charset = null; + initialized = true; + } + + updateCurrentValue(); + + if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) { + if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count - dataBuffer.LineStart) { + int newLength = rawBuffer.Buffer.length + + Math.max(dataBuffer.Count - dataBuffer.LineStart, rawBuffer.Buffer.length); + + char[] holder = new char[newLength]; + + System.arraycopy(rawBuffer.Buffer, 0, holder, 0, rawBuffer.Position); + + rawBuffer.Buffer = holder; + } + + System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart, rawBuffer.Buffer, rawBuffer.Position, + dataBuffer.Count - dataBuffer.LineStart); + + rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart; + } + + try { + dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0, dataBuffer.Buffer.length); + } catch (IOException ex) { + close(); + + throw ex; + } + + // if no more data could be found, set flag stating that + // the end of the data was found + + if (dataBuffer.Count == -1) { + hasMoreData = false; + } + + dataBuffer.Position = 0; + dataBuffer.LineStart = 0; + dataBuffer.ColumnStart = 0; + } + + /** + * Read the first record of data as column headers. + * + * @return Whether the header record was successfully read or not. + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + public boolean readHeaders() throws IOException { + boolean result = readRecord(); + + // copy the header data from the column array + // to the header string array + + headersHolder.Length = columnsCount; + + headersHolder.Headers = new String[columnsCount]; + + for (int i = 0; i < headersHolder.Length; i++) { + String columnValue = get(i); + + headersHolder.Headers[i] = columnValue; + + // if there are duplicate header names, we will save the last one + headersHolder.IndexByName.put(columnValue, new Integer(i)); + } + + if (result) { + currentRecord--; + } + + columnsCount = 0; + + return result; + } + + /** + * Returns the column header value for a given column index. + * + * @param columnIndex + * The index of the header column being requested. + * @return The value of the column header at the given column index. + * @exception IOException + * Thrown if this object has already been closed. + */ + public String getHeader(int columnIndex) throws IOException { + checkClosed(); + + // check to see if we have read the header record yet + + // check to see if the column index is within the bounds + // of our header array + + if (columnIndex > -1 && columnIndex < headersHolder.Length) { + // return the processed header data for this column + + return headersHolder.Headers[columnIndex]; + } else { + return ""; + } + } + + public boolean isQualified(int columnIndex) throws IOException { + checkClosed(); + + if (columnIndex < columnsCount && columnIndex > -1) { + return isQualified[columnIndex]; + } else { + return false; + } + } + + /** + * @exception IOException + * Thrown if a very rare extreme exception occurs during + * parsing, normally resulting from improper data format. + */ + private void endColumn() throws IOException { + String currentValue = ""; + + // must be called before setting startedColumn = false + if (startedColumn) { + if (columnBuffer.Position == 0) { + if (dataBuffer.ColumnStart < dataBuffer.Position) { + int lastLetter = dataBuffer.Position - 1; + + if (userSettings.TrimWhitespace && !startedWithQualifier) { + while (lastLetter >= dataBuffer.ColumnStart + && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) { + lastLetter--; + } + } + + currentValue = new String(dataBuffer.Buffer, dataBuffer.ColumnStart, lastLetter + - dataBuffer.ColumnStart + 1); + } + } else { + updateCurrentValue(); + + int lastLetter = columnBuffer.Position - 1; + + if (userSettings.TrimWhitespace && !startedWithQualifier) { + while (lastLetter >= 0 + && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) { + lastLetter--; + } + } + + currentValue = new String(columnBuffer.Buffer, 0, lastLetter + 1); + } + } + + columnBuffer.Position = 0; + + startedColumn = false; + + if (columnsCount >= 100000 && userSettings.SafetySwitch) { + close(); + + throw new IOException("Maximum column count of 100,000 exceeded in record " + + NumberFormat.getIntegerInstance().format(currentRecord) + + ". Set the SafetySwitch property to false" + + " if you're expecting more than 100,000 columns per record to" + " avoid this error."); + } + + // check to see if our current holder array for + // column chunks is still big enough to handle another + // column chunk + + if (columnsCount == values.length) { + // holder array needs to grow to be able to hold another column + int newLength = values.length * 2; + + String[] holder = new String[newLength]; + + System.arraycopy(values, 0, holder, 0, values.length); + + values = holder; + + boolean[] qualifiedHolder = new boolean[newLength]; + + System.arraycopy(isQualified, 0, qualifiedHolder, 0, isQualified.length); + + isQualified = qualifiedHolder; + } + + // BEGIN + // An unquoted empty element string means null + if (!startedWithQualifier && currentValue.equals("")) { + currentValue = null; + } + // END + + values[columnsCount] = currentValue; + + isQualified[columnsCount] = startedWithQualifier; + + currentValue = ""; + startedWithQualifier=false; + columnsCount++; + } + + private void appendLetter(char letter) { + if (columnBuffer.Position == columnBuffer.Buffer.length) { + int newLength = columnBuffer.Buffer.length * 2; + + char[] holder = new char[newLength]; + + System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position); + + columnBuffer.Buffer = holder; + } + columnBuffer.Buffer[columnBuffer.Position++] = letter; + dataBuffer.ColumnStart = dataBuffer.Position + 1; + } + + private void updateCurrentValue() { + if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) { + if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position - dataBuffer.ColumnStart) { + int newLength = columnBuffer.Buffer.length + + Math.max(dataBuffer.Position - dataBuffer.ColumnStart, columnBuffer.Buffer.length); + + char[] holder = new char[newLength]; + + System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position); + + columnBuffer.Buffer = holder; + } + + System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart, columnBuffer.Buffer, columnBuffer.Position, + dataBuffer.Position - dataBuffer.ColumnStart); + + columnBuffer.Position += dataBuffer.Position - dataBuffer.ColumnStart; + } + + dataBuffer.ColumnStart = dataBuffer.Position + 1; + } + + /** + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + private void endRecord() throws IOException { + // this flag is used as a loop exit condition + // during parsing + + hasReadNextLine = true; + + currentRecord++; + } + + /** + * Gets the corresponding column index for a given column header name. + * + * @param headerName + * The header name of the column. + * @return The column index for the given column header name. Returns + * -1 if not found. + * @exception IOException + * Thrown if this object has already been closed. + */ + public int getIndex(String headerName) throws IOException { + checkClosed(); + + Object indexValue = headersHolder.IndexByName.get(headerName); + + if (indexValue != null) { + return ((Integer) indexValue).intValue(); + } else { + return -1; + } + } + + /** + * Skips the next record of data by parsing each column. Does not + * increment + * {@link org.jumpmind.symmetric.csv.CsvReader#getCurrentRecord getCurrentRecord()}. + * + * @return Whether another record was successfully skipped or not. + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + public boolean skipRecord() throws IOException { + checkClosed(); + + boolean recordRead = false; + + if (hasMoreData) { + recordRead = readRecord(); + + if (recordRead) { + currentRecord--; + } + } + + return recordRead; + } + + /** + * Skips the next line of data using the standard end of line characters and + * does not do any column delimited parsing. + * + * @return Whether a line was successfully skipped or not. + * @exception IOException + * Thrown if an error occurs while reading data from the + * source stream. + */ + public boolean skipLine() throws IOException { + checkClosed(); + + // clear public column values for current line + + columnsCount = 0; + + boolean skippedLine = false; + + if (hasMoreData) { + boolean foundEol = false; + + do { + if (dataBuffer.Position == dataBuffer.Count) { + checkDataLength(); + } else { + skippedLine = true; + + // grab the current letter as a char + + char currentLetter = dataBuffer.Buffer[dataBuffer.Position]; + + if (currentLetter == Letters.CR || currentLetter == Letters.LF) { + foundEol = true; + } + + // keep track of the last letter because we need + // it for several key decisions + + lastLetter = currentLetter; + + if (!foundEol) { + dataBuffer.Position++; + } + + } // end else + } while (hasMoreData && !foundEol); + + columnBuffer.Position = 0; + + dataBuffer.LineStart = dataBuffer.Position + 1; + } + + rawBuffer.Position = 0; + rawRecord = ""; + + return skippedLine; + } + + /** + * Closes and releases all related resources. + */ + public void close() { + if (!closed) { + close(true); + + closed = true; + } + } + + /** + * + */ + private void close(boolean closing) { + if (!closed) { + if (closing) { + charset = null; + headersHolder.Headers = null; + headersHolder.IndexByName = null; + dataBuffer.Buffer = null; + columnBuffer.Buffer = null; + rawBuffer.Buffer = null; + } + + try { + if (initialized) { + inputStream.close(); + } + } catch (Exception e) { + // just eat the exception + } + + inputStream = null; + + closed = true; + } + } + + /** + * @exception IOException + * Thrown if this object has already been closed. + */ + private void checkClosed() throws IOException { + if (closed) { + throw new IOException("This instance of the CsvReader class has already been closed."); + } + } + + @Override + protected void finalize() { + close(false); + } + + private class ComplexEscape { + private static final int UNICODE = 1; + + private static final int OCTAL = 2; + + private static final int DECIMAL = 3; + + private static final int HEX = 4; + } + + private static char hexToDec(char hex) { + char result; + + if (hex >= 'a') { + result = (char) (hex - 'a' + 10); + } else if (hex >= 'A') { + result = (char) (hex - 'A' + 10); + } else { + result = (char) (hex - '0'); + } + + return result; + } + + private class DataBuffer { + public char[] Buffer; + + public int Position; + + // / + // / How much usable data has been read into the stream, + // / which will not always be as long as Buffer.Length. + // / + public int Count; + + // / + // / The position of the cursor in the buffer when the + // / current column was started or the last time data + // / was moved out to the column buffer. + // / + public int ColumnStart; + + public int LineStart; + + public DataBuffer() { + Buffer = new char[StaticSettings.MAX_BUFFER_SIZE]; + Position = 0; + Count = 0; + ColumnStart = 0; + LineStart = 0; + } + } + + private class ColumnBuffer { + public char[] Buffer; + + public int Position; + + public ColumnBuffer() { + Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE]; + Position = 0; + } + } + + private class RawRecordBuffer { + public char[] Buffer; + + public int Position; + + public RawRecordBuffer() { + Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE * StaticSettings.INITIAL_COLUMN_COUNT]; + Position = 0; + } + } + + private class Letters { + public static final char LF = '\n'; + + public static final char CR = '\r'; + + public static final char QUOTE = '"'; + + public static final char COMMA = ','; + + public static final char SPACE = ' '; + + public static final char TAB = '\t'; + + public static final char POUND = '#'; + + public static final char BACKSLASH = '\\'; + + public static final char NULL = '\0'; + + public static final char BACKSPACE = '\b'; + + public static final char FORM_FEED = '\f'; + + public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape + + public static final char VERTICAL_TAB = '\u000B'; + + public static final char ALERT = '\u0007'; + } + + private class UserSettings { + // having these as publicly accessible members will prevent + // the overhead of the method call that exists on properties + + public char TextQualifier; + + public boolean TrimWhitespace; + + public boolean UseTextQualifier; + + public char Delimiter; + + public char RecordDelimiter; + + public char Comment; + + public boolean UseComments; + + public int EscapeMode; + + public boolean SafetySwitch; + + public boolean SkipEmptyRecords; + + public boolean CaptureRawRecord; + + public UserSettings() { + TextQualifier = Letters.QUOTE; + TrimWhitespace = true; + UseTextQualifier = true; + Delimiter = Letters.COMMA; + RecordDelimiter = Letters.NULL; + Comment = Letters.POUND; + UseComments = false; + EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED; + SafetySwitch = true; + SkipEmptyRecords = true; + CaptureRawRecord = true; + } + } + + private class HeadersHolder { + public String[] Headers; + + public int Length; + + public HashMap IndexByName; + + public HeadersHolder() { + Headers = null; + Length = 0; + IndexByName = new HashMap(); + } + } + + private class StaticSettings { + // these are static instead of final so they can be changed in unit test + // isn't visible outside this class and is only accessed once during + // CsvReader construction + public static final int MAX_BUFFER_SIZE = 1024; + + public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024; + + public static final int INITIAL_COLUMN_COUNT = 10; + + public static final int INITIAL_COLUMN_BUFFER_SIZE = 50; + } +} \ No newline at end of file diff --git a/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvWriter.java b/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvWriter.java new file mode 100644 index 0000000000..104593d39b --- /dev/null +++ b/future/symmetric3-csv/src/main/java/org/jumpmind/symmetric/csv/CsvWriter.java @@ -0,0 +1,595 @@ +/* + * Java CSV is a stream based library for reading and writing + * CSV and other delimited data. + * + * Copyright (C) Bruce Dunwiddie bruce@csvreader.com + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ +package org.jumpmind.symmetric.csv; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Writer; +import java.nio.charset.Charset; + +/** + * A stream based writer for writing delimited text data to a file or a stream. + */ +public class CsvWriter { + private PrintWriter outputStream = null; + + private String fileName = null; + + private boolean firstColumn = true; + + private boolean useCustomRecordDelimiter = false; + + private Charset charset = null; + + // this holds all the values for switches that the user is allowed to set + private UserSettings userSettings = new UserSettings(); + + private boolean initialized = false; + + private boolean closed = false; + + /** + * Double up the text qualifier to represent an occurrence of the text + * qualifier. + */ + public static final int ESCAPE_MODE_DOUBLED = 1; + + /** + * Use a backslash character before the text qualifier to represent an + * occurrence of the text qualifier. + */ + public static final int ESCAPE_MODE_BACKSLASH = 2; + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvWriter CsvWriter} object using a file + * as the data destination. + * + * @param fileName + * The path to the file to output the data. + * @param delimiter + * The character to use as the column delimiter. + * @param charset + * The {@link java.nio.charset.Charset Charset} to use while + * writing the data. + */ + public CsvWriter(String fileName, char delimiter, Charset charset) { + if (fileName == null) { + throw new IllegalArgumentException("Parameter fileName can not be null."); + } + + if (charset == null) { + throw new IllegalArgumentException("Parameter charset can not be null."); + } + + this.fileName = fileName; + userSettings.Delimiter = delimiter; + this.charset = charset; + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvWriter CsvWriter} object using a file + * as the data destination. Uses a comma as the column delimiter and + * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}. + * + * @param fileName + * The path to the file to output the data. + */ + public CsvWriter(String fileName) { + this(fileName, Letters.COMMA, Charset.forName("ISO-8859-1")); + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvWriter CsvWriter} object using a Writer + * to write data to. + * + * @param outputStream + * The stream to write the column delimited data to. + * @param delimiter + * The character to use as the column delimiter. + */ + public CsvWriter(Writer outputStream, char delimiter) { + if (outputStream == null) { + throw new IllegalArgumentException("Parameter outputStream can not be null."); + } + + this.outputStream = new PrintWriter(outputStream); + userSettings.Delimiter = delimiter; + initialized = true; + } + + /** + * Creates a {@link org.jumpmind.symmetric.csv.CsvWriter CsvWriter} object using an + * OutputStream to write data to. + * + * @param outputStream + * The stream to write the column delimited data to. + * @param delimiter + * The character to use as the column delimiter. + * @param charset + * The {@link java.nio.charset.Charset Charset} to use while + * writing the data. + */ + public CsvWriter(OutputStream outputStream, char delimiter, Charset charset) { + this(new OutputStreamWriter(outputStream, charset), delimiter); + } + + /** + * Gets the character being used as the column delimiter. + * + * @return The character being used as the column delimiter. + */ + public char getDelimiter() { + return userSettings.Delimiter; + } + + /** + * Sets the character to use as the column delimiter. + * + * @param delimiter + * The character to use as the column delimiter. + */ + public void setDelimiter(char delimiter) { + userSettings.Delimiter = delimiter; + } + + public char getRecordDelimiter() { + return userSettings.RecordDelimiter; + } + + /** + * Sets the character to use as the record delimiter. + * + * @param recordDelimiter + * The character to use as the record delimiter. Default is + * combination of standard end of line characters for + * Windows, Unix, or Mac. + */ + public void setRecordDelimiter(char recordDelimiter) { + useCustomRecordDelimiter = true; + userSettings.RecordDelimiter = recordDelimiter; + } + + /** + * Gets the character to use as a text qualifier in the data. + * + * @return The character to use as a text qualifier in the data. + */ + public char getTextQualifier() { + return userSettings.TextQualifier; + } + + /** + * Sets the character to use as a text qualifier in the data. + * + * @param textQualifier + * The character to use as a text qualifier in the data. + */ + public void setTextQualifier(char textQualifier) { + userSettings.TextQualifier = textQualifier; + } + + /** + * Whether text qualifiers will be used while writing data or not. + * + * @return Whether text qualifiers will be used while writing data or not. + */ + public boolean getUseTextQualifier() { + return userSettings.UseTextQualifier; + } + + /** + * Sets whether text qualifiers will be used while writing data or not. + * + * @param useTextQualifier + * Whether to use a text qualifier while writing data or not. + */ + public void setUseTextQualifier(boolean useTextQualifier) { + userSettings.UseTextQualifier = useTextQualifier; + } + + public int getEscapeMode() { + return userSettings.EscapeMode; + } + + public void setEscapeMode(int escapeMode) { + userSettings.EscapeMode = escapeMode; + } + + public void setComment(char comment) { + userSettings.Comment = comment; + } + + public char getComment() { + return userSettings.Comment; + } + + /** + * Whether fields will be surrounded by the text qualifier even if the + * qualifier is not necessarily needed to escape this field. + * + * @return Whether fields will be forced to be qualified or not. + */ + public boolean getForceQualifier() { + return userSettings.ForceQualifier; + } + + /** + * Use this to force all fields to be surrounded by the text qualifier even + * if the qualifier is not necessarily needed to escape this field. Default + * is false. + * + * @param forceQualifier + * Whether to force the fields to be qualified or not. + */ + public void setForceQualifier(boolean forceQualifier) { + userSettings.ForceQualifier = forceQualifier; + } + + /** + * Writes another column of data to this record. + * + * @param content + * The data for the new column. + * @param preserveSpaces + * Whether to preserve leading and trailing whitespace in + * this column of data. + * @exception IOException + * Thrown if an error occurs while writing data to the + * destination stream. + */ + public void write(String content, boolean preserveSpaces) throws IOException { + checkClosed(); + + checkInit(); + + // BEGIN + // if (content == null) { + // content = ""; + // } + // END + + if (!firstColumn) { + outputStream.write(userSettings.Delimiter); + } + + boolean textQualify = userSettings.ForceQualifier; + + // BEGIN + // We want a null to be an empty unquoted element + if (content == null) { + content = ""; + textQualify = false; + } + // We want an empty string to be a quoted element + else if (content.equals("")) { + textQualify = true; + } + // END + + if (!preserveSpaces && content.length() > 0) { + content = content.trim(); + } + + if (!textQualify + && userSettings.UseTextQualifier + && (content.indexOf(userSettings.TextQualifier) > -1 + || content.indexOf(userSettings.Delimiter) > -1 + || (!useCustomRecordDelimiter && (content.indexOf(Letters.LF) > -1 || content + .indexOf(Letters.CR) > -1)) + || (useCustomRecordDelimiter && content.indexOf(userSettings.RecordDelimiter) > -1) + || (firstColumn && content.length() > 0 && content.charAt(0) == userSettings.Comment) || + // check for empty first column, which if on its own line must + // be qualified or the line will be skipped + (firstColumn && content.length() == 0))) { + textQualify = true; + } + + if (userSettings.UseTextQualifier && !textQualify && content.length() > 0 && preserveSpaces) { + char firstLetter = content.charAt(0); + + if (firstLetter == Letters.SPACE || firstLetter == Letters.TAB) { + textQualify = true; + } + + if (!textQualify && content.length() > 1) { + char lastLetter = content.charAt(content.length() - 1); + + if (lastLetter == Letters.SPACE || lastLetter == Letters.TAB) { + textQualify = true; + } + } + } + + if (textQualify) { + outputStream.write(userSettings.TextQualifier); + + if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { + content = replace(content, "" + Letters.BACKSLASH, "" + Letters.BACKSLASH + Letters.BACKSLASH); + content = replace(content, "" + userSettings.TextQualifier, "" + Letters.BACKSLASH + + userSettings.TextQualifier); + } else { + content = replace(content, "" + userSettings.TextQualifier, "" + userSettings.TextQualifier + + userSettings.TextQualifier); + } + } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { + content = replace(content, "" + Letters.BACKSLASH, "" + Letters.BACKSLASH + Letters.BACKSLASH); + content = replace(content, "" + userSettings.Delimiter, "" + Letters.BACKSLASH + userSettings.Delimiter); + + if (useCustomRecordDelimiter) { + content = replace(content, "" + userSettings.RecordDelimiter, "" + Letters.BACKSLASH + + userSettings.RecordDelimiter); + } else { + content = replace(content, "" + Letters.CR, "" + Letters.BACKSLASH + Letters.CR); + content = replace(content, "" + Letters.LF, "" + Letters.BACKSLASH + Letters.LF); + } + + if (firstColumn && content.length() > 0 && content.charAt(0) == userSettings.Comment) { + if (content.length() > 1) { + content = "" + Letters.BACKSLASH + userSettings.Comment + content.substring(1); + } else { + content = "" + Letters.BACKSLASH + userSettings.Comment; + } + } + } + + outputStream.write(content); + + if (textQualify) { + outputStream.write(userSettings.TextQualifier); + } + + firstColumn = false; + } + + /** + * Writes another column of data to this record. Does not preserve + * leading and trailing whitespace in this column of data. + * + * @param content + * The data for the new column. + * @exception IOException + * Thrown if an error occurs while writing data to the + * destination stream. + */ + public void write(String content) throws IOException { + write(content, false); + } + + public void writeComment(String commentText) throws IOException { + checkClosed(); + + checkInit(); + + outputStream.write(userSettings.Comment); + + outputStream.write(commentText); + + if (useCustomRecordDelimiter) { + outputStream.write(userSettings.RecordDelimiter); + } else { + outputStream.println(); + } + + firstColumn = true; + } + + /** + * Writes a new record using the passed in array of values. + * + * @param values + * Values to be written. + * + * @param preserveSpaces + * Whether to preserver leading and trailing spaces in + * columns while writing out to the record or not. + * + * @throws IOException + * Thrown if an error occurs while writing data to the + * destination stream. + */ + public void writeRecord(String[] values, boolean preserveSpaces) throws IOException { + if (values != null && values.length > 0) { + for (int i = 0; i < values.length; i++) { + write(values[i], preserveSpaces); + } + + endRecord(); + } + } + + /** + * Writes a new record using the passed in array of values. + * + * @param values + * Values to be written. + * + * @throws IOException + * Thrown if an error occurs while writing data to the + * destination stream. + */ + public void writeRecord(String[] values) throws IOException { + writeRecord(values, false); + } + + /** + * Ends the current record by sending the record delimiter. + * + * @exception IOException + * Thrown if an error occurs while writing data to the + * destination stream. + */ + public void endRecord() throws IOException { + checkClosed(); + + checkInit(); + + if (useCustomRecordDelimiter) { + outputStream.write(userSettings.RecordDelimiter); + } else { + outputStream.println(); + } + + firstColumn = true; + } + + /** + * + */ + private void checkInit() throws IOException { + if (!initialized) { + if (fileName != null) { + outputStream = new PrintWriter(new OutputStreamWriter(new FileOutputStream(fileName), charset)); + } + + initialized = true; + } + } + + /** + * Clears all buffers for the current writer and causes any buffered data to + * be written to the underlying device. + */ + public void flush() { + outputStream.flush(); + } + + /** + * Closes and releases all related resources. + */ + public void close() { + if (!closed) { + close(true); + + closed = true; + } + } + + /** + * + */ + private void close(boolean closing) { + if (!closed) { + if (closing) { + charset = null; + } + + try { + if (initialized) { + outputStream.close(); + } + } catch (Exception e) { + // just eat the exception + } + + outputStream = null; + + closed = true; + } + } + + /** + * + */ + private void checkClosed() throws IOException { + if (closed) { + throw new IOException("This instance of the CsvWriter class has already been closed."); + } + } + + @Override + protected void finalize() { + close(false); + } + + private class Letters { + public static final char LF = '\n'; + + public static final char CR = '\r'; + + public static final char QUOTE = '"'; + + public static final char COMMA = ','; + + public static final char SPACE = ' '; + + public static final char TAB = '\t'; + + public static final char POUND = '#'; + + public static final char BACKSLASH = '\\'; + + public static final char NULL = '\0'; + } + + private class UserSettings { + // having these as publicly accessible members will prevent + // the overhead of the method call that exists on properties + public char TextQualifier; + + public boolean UseTextQualifier; + + public char Delimiter; + + public char RecordDelimiter; + + public char Comment; + + public int EscapeMode; + + public boolean ForceQualifier; + + public UserSettings() { + TextQualifier = Letters.QUOTE; + UseTextQualifier = true; + Delimiter = Letters.COMMA; + RecordDelimiter = Letters.NULL; + Comment = Letters.POUND; + EscapeMode = ESCAPE_MODE_DOUBLED; + ForceQualifier = false; + } + } + + public static String replace(String original, String pattern, String replace) { + final int len = pattern.length(); + int found = original.indexOf(pattern); + + if (found > -1) { + StringBuffer sb = new StringBuffer(); + int start = 0; + + while (found != -1) { + sb.append(original.substring(start, found)); + sb.append(replace); + start = found + len; + found = original.indexOf(pattern, start); + } + + sb.append(original.substring(start)); + + return sb.toString(); + } else { + return original; + } + } +} \ No newline at end of file