Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@

10. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix.

11. `fread()` would not give a warning when every second line of input was empty, [#3339](https://github.com/Rdatatable/data.table/issues/3339). Now, a warning message 'The rows in this file appear to be separated by blank lines.' is given and suggests to set `blank.lines.skip` to `TRUE`. Thanks to @Henrik-P for the report and @Asa-Henry for the fix.

### Notes

1. {data.table} now depends on R 3.5.0 (2018).
Expand Down
20 changes: 11 additions & 9 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -8059,20 +8059,22 @@ test(1577.3, levels(X$b), character(0))

# FR #530, skip blank lines
input = "Header not 2 columns\n\n1,3\n2,4"
test(1578.1, fread(input), data.table(V1=1:2, V2=3:4))
test(1578.01, fread(input), data.table(V1=1:2, V2=3:4))
input = "a,b\n\n1,3\n2,4"
test(1578.2, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section
test(1578.3, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))
test(1578.02, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section
test(1578.03, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))
input = "a,b\n\n\n1,3\n2,4"
test(1578.4, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))
test(1578.04, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))
input = "a,b\n\n\n1,3\n\n2,4\n\n"
test(1578.5, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))
test(1578.05, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4))

f = testDir("530_fread.txt")
test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <<a,b>>")
test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4))
test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4))
test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50
test(1578.06, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <<a,b>>")
test(1578.07, fread(f, skip=49L), data.table(V1=1:2, V2=3:4))
test(1578.08, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4))
test(1578.09, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50
input = "x y\n\n1 a\n\n2 b\n\n3 c"
test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.")

# test 1579 moved to optimize.Rraw

Expand Down
5 changes: 5 additions & 0 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,7 @@ int freadMain(freadMainArgs _args)
int topNumFields = 1; // how many fields that was, to resolve ties
enum quote_rule_t topQuoteRule = -1; // which quote rule that was
int topSkip = 0; // how many rows to auto-skip
// #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line
const char *topStart = NULL;

for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok
Expand Down Expand Up @@ -1950,6 +1951,10 @@ int freadMain(freadMainArgs _args)
}
}
}
if (!prevStart && topSkip > 1 && !skipEmptyLines)
{
DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n"));
}
if (!firstJumpEnd) {
if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n"));
topNumFields = 1;
Expand Down
Loading