Skip to content

Commit

Permalink
Merge pull request #349 from ExcelDataReader/develop
Browse files Browse the repository at this point in the history
3.4.1
  • Loading branch information
andersnm committed Aug 29, 2018
2 parents fa71412 + aa8a828 commit de60db2
Show file tree
Hide file tree
Showing 23 changed files with 251 additions and 142 deletions.
4 changes: 2 additions & 2 deletions src/ExcelDataReader.DataSet/ExcelDataReader.DataSet.csproj
Expand Up @@ -3,9 +3,9 @@
<PropertyGroup>
<Description>ExcelDataReader extension for reading Microsoft Excel files into System.Data.DataSet.</Description>
<AssemblyTitle>ExcelDataReader.DataSet</AssemblyTitle>
<VersionPrefix>3.4.0</VersionPrefix>
<VersionPrefix>3.4.1</VersionPrefix>
<Authors>ExcelDataReader developers</Authors>
<TargetFrameworks>net20;net45;netstandard2.0</TargetFrameworks>
<TargetFrameworks>net20;net35;netstandard2.0</TargetFrameworks>
<AssemblyName>ExcelDataReader.DataSet</AssemblyName>
<AssemblyOriginatorKeyFile>..\ExcelDataReader.snk</AssemblyOriginatorKeyFile>
<SignAssembly>true</SignAssembly>
Expand Down
4 changes: 4 additions & 0 deletions src/ExcelDataReader/Core/BinaryFormat/XlsWorksheet.cs
Expand Up @@ -524,6 +524,10 @@ private void ReadWorksheetGlobals()

recordOffset = biffStream.Position;
rec = biffStream.Read();

// Stop if we find the start out a new substream. Not always that files have the required EOF before a substream BOF.
if (rec is XlsBiffBOF)
break;
}

if (header != null || footer != null)
Expand Down
57 changes: 18 additions & 39 deletions src/ExcelDataReader/Core/CsvFormat/CsvParser.cs
Expand Up @@ -12,12 +12,13 @@ internal class CsvParser
public CsvParser(char separator, Encoding encoding)
{
Separator = separator;
QuoteChar = '"';

Decoder = encoding.GetDecoder();
Decoder.Fallback = new DecoderExceptionFallback();

MaxCharBytes = encoding.GetMaxByteCount(1);
Buffer = new byte[MaxCharBytes];
var bufferSize = 1024;
CharBuffer = new char[bufferSize];

State = CsvState.PreValue;
}
Expand All @@ -35,21 +36,17 @@ private enum CsvState

private CsvState State { get; set; }

private char QuoteChar { get; set; }
private char QuoteChar { get; }

private int TrailingWhitespaceCount { get; set; }

private Decoder Decoder { get; }

private int MaxCharBytes { get; }

private bool HasCarriageReturn { get; set; }

private char Separator { get; }

private byte[] Buffer { get; set; }

private int BufferWritePosition { get; set; }
private char[] CharBuffer { get; set; }

private StringBuilder ValueResult { get; set; } = new StringBuilder();

Expand All @@ -59,20 +56,25 @@ private enum CsvState

public void ParseBuffer(byte[] bytes, int offset, int count, out List<List<string>> rows)
{
for (var i = 0; i < count; i++)
ParseByte(bytes[offset + i]);
while (count > 0)
{
Decoder.Convert(bytes, offset, count, CharBuffer, 0, CharBuffer.Length, false, out var bytesUsed, out var charsUsed, out var completed);

offset += bytesUsed;
count -= bytesUsed;

for (var i = 0; i < charsUsed; i++)
{
ParseChar(CharBuffer[i], 1);
}
}

rows = RowsResult;
RowsResult = new List<List<string>>();
}

public void Flush(out List<List<string>> rows)
{
while (BufferWritePosition > 0)
{
DecodeChar();
}

if (State != CsvState.PreValue)
{
AddValueToRow();
Expand All @@ -83,27 +85,6 @@ public void Flush(out List<List<string>> rows)
RowsResult = new List<List<string>>();
}

private void ParseByte(byte b)
{
Buffer[BufferWritePosition] = b;
BufferWritePosition++;

if (BufferWritePosition == MaxCharBytes)
{
DecodeChar();
}
}

private void DecodeChar()
{
var c = new char[1];
Decoder.Convert(Buffer, 0, BufferWritePosition, c, 0, 1, true, out var bytesUsed, out var charsUsed, out var completed);
ParseChar(c[0], bytesUsed);

Array.Copy(Buffer, bytesUsed, Buffer, 0, BufferWritePosition - bytesUsed);
BufferWritePosition -= bytesUsed;
}

private void ParseChar(char c, int bytesUsed)
{
var parsed = false;
Expand Down Expand Up @@ -141,9 +122,8 @@ private bool ReadPreValue(char c, int bytesUsed)
{
return true;
}
else if (c == '"' || c == '\'')
else if (c == QuoteChar)
{
QuoteChar = c;
State = CsvState.QuotedValue;
return true;
}
Expand Down Expand Up @@ -228,7 +208,6 @@ private bool ReadQuotedValueQuote(char c, int bytesUsed)
else
{
// End of quote, read remainder of field as a regular value until separator
QuoteChar = '\0';
State = CsvState.Value;
return false;
}
Expand Down
102 changes: 51 additions & 51 deletions src/ExcelDataReader/Core/OpenXmlFormat/XlsxWorkbook.cs
Expand Up @@ -59,6 +59,34 @@ public XlsxWorkbook(ZipWorker zipWorker)

public int ResultsCount => Sheets?.Count ?? -1;

public static string ReadStringItem(XmlReader reader)
{
string result = string.Empty;
if (!XmlReaderHelper.ReadFirstContent(reader))
{
return result;
}

while (!reader.EOF)
{
if (reader.IsStartElement(ElementT, NsSpreadsheetMl))
{
// There are multiple <t> in a <si>. Concatenate <t> within an <si>.
result += reader.ReadElementContentAsString();
}
else if (reader.IsStartElement(ElementR, NsSpreadsheetMl))
{
result += ReadRichTextRun(reader);
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}

return result;
}

public IEnumerable<XlsxWorksheet> ReadWorksheets()
{
foreach (var sheet in Sheets)
Expand All @@ -67,6 +95,29 @@ public IEnumerable<XlsxWorksheet> ReadWorksheets()
}
}

private static string ReadRichTextRun(XmlReader reader)
{
string result = string.Empty;
if (!XmlReaderHelper.ReadFirstContent(reader))
{
return result;
}

while (!reader.EOF)
{
if (reader.IsStartElement(ElementT, NsSpreadsheetMl))
{
result += reader.ReadElementContentAsString();
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}

return result;
}

private void ReadWorkbook()
{
using (var stream = _zipWorker.GetWorkbookStream())
Expand Down Expand Up @@ -230,57 +281,6 @@ private void ReadSharedStrings(XmlReader reader)
}
}

private string ReadStringItem(XmlReader reader)
{
string result = string.Empty;
if (!XmlReaderHelper.ReadFirstContent(reader))
{
return result;
}

while (!reader.EOF)
{
if (reader.IsStartElement(ElementT, NsSpreadsheetMl))
{
// There are multiple <t> in a <si>. Concatenate <t> within an <si>.
result += reader.ReadElementContentAsString();
}
else if (reader.IsStartElement(ElementR, NsSpreadsheetMl))
{
result += ReadRichTextRun(reader);
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}

return result;
}

private string ReadRichTextRun(XmlReader reader)
{
string result = string.Empty;
if (!XmlReaderHelper.ReadFirstContent(reader))
{
return result;
}

while (!reader.EOF)
{
if (reader.IsStartElement(ElementT, NsSpreadsheetMl))
{
result += reader.ReadElementContentAsString();
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}

return result;
}

private void ReadStyles()
{
using (var stream = _zipWorker.GetStylesStream())
Expand Down
56 changes: 26 additions & 30 deletions src/ExcelDataReader/Core/OpenXmlFormat/XlsxWorksheet.cs
Expand Up @@ -270,23 +270,41 @@ private IEnumerable<XlsxRow> ReadSheetData(XmlReader xmlReader)
yield break;
}

Row row = null;

int nextRowIndex = 0;
while (!xmlReader.EOF)
{
if (xmlReader.IsStartElement(NRow, NsSpreadsheetMl))
{
var row = ReadRow(xmlReader, nextRowIndex);
nextRowIndex = row.RowIndex + 1;
yield return new XlsxRow()
var currentRow = ReadRow(xmlReader, nextRowIndex);

if (row == null)
{
row = currentRow;
}
else if (currentRow.RowIndex != row.RowIndex)
{
Row = row
};
yield return new XlsxRow { Row = row };
row = currentRow;
}
else
{
row.Cells.AddRange(currentRow.Cells);
}

nextRowIndex = currentRow.RowIndex + 1;
}
else if (!XmlReaderHelper.SkipContent(xmlReader))
{
break;
}
}

if (row != null)
{
yield return new XlsxRow { Row = row };
}
}

private XlsxMergeCells ReadMergeCells(XmlReader xmlReader)
Expand Down Expand Up @@ -457,7 +475,7 @@ private Cell ReadCell(XmlReader xmlReader, int nextColumnIndex)
}
else if (xmlReader.IsStartElement(NIs, NsSpreadsheetMl))
{
var rawValue = ReadInlineString(xmlReader);
var rawValue = XlsxWorkbook.ReadStringItem(xmlReader);
if (!string.IsNullOrEmpty(rawValue))
result.Value = ConvertCellValue(rawValue, aT, result.NumberFormatIndex);
}
Expand All @@ -470,30 +488,6 @@ private Cell ReadCell(XmlReader xmlReader, int nextColumnIndex)
return result;
}

private string ReadInlineString(XmlReader xmlReader)
{
string result = null;

if (!XmlReaderHelper.ReadFirstContent(xmlReader))
{
return result;
}

while (!xmlReader.EOF)
{
if (xmlReader.IsStartElement(NT, NsSpreadsheetMl))
{
result = xmlReader.ReadElementContentAsString();
}
else if (!XmlReaderHelper.SkipContent(xmlReader))
{
break;
}
}

return result;
}

private object ConvertCellValue(string rawValue, string aT, int numberFormatIndex)
{
const NumberStyles style = NumberStyles.Any;
Expand Down Expand Up @@ -521,6 +515,8 @@ private object ConvertCellValue(string rawValue, string aT, int numberFormatInde
return date;

return rawValue;
case "e": //// error
return null;
default:
if (double.TryParse(rawValue, style, invariantCulture, out double number))
{
Expand Down
2 changes: 1 addition & 1 deletion src/ExcelDataReader/ExcelBinaryReader.cs
Expand Up @@ -20,7 +20,7 @@ public ExcelBinaryReader(Stream stream, string password, Encoding fallbackEncodi
public override void Close()
{
base.Close();
Workbook.Stream?.Dispose();
Workbook?.Stream?.Dispose();
Workbook = null;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/ExcelDataReader/ExcelCsvReader.cs
Expand Up @@ -17,7 +17,7 @@ public ExcelCsvReader(Stream stream, Encoding fallbackEncoding, char[] autodetec
public override void Close()
{
base.Close();
Workbook.Stream?.Dispose();
Workbook?.Stream?.Dispose();
Workbook = null;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/ExcelDataReader/ExcelDataReader.csproj
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>
<Description>Lightweight and fast library written in C# for reading Microsoft Excel files (2.0-2007).</Description>
<AssemblyTitle>ExcelDataReader</AssemblyTitle>
<VersionPrefix>3.4.0</VersionPrefix>
<VersionPrefix>3.4.1</VersionPrefix>
<Authors>ExcelDataReader developers</Authors>
<TargetFrameworks>net20;net45;netstandard1.3;netstandard2.0</TargetFrameworks>
<AssemblyName>ExcelDataReader</AssemblyName>
Expand Down

0 comments on commit de60db2

Please sign in to comment.