diff --git a/NEWS.md b/NEWS.md index 021ddbbb6..5d9e163ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -50,6 +50,8 @@ 9. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix. +10. `fread()` no longer misreads dates with negative years, [#7704](https://github.com/Rdatatable/data.table/issues/7704). Thanks to @kevinushey for the report and @aitap for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 443487c6a..1b5ea162a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21585,3 +21585,9 @@ close(con) file.create(f <- tempfile()) test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.") unlink(f) + +# negative years caused UB in leap year calculation, #7704 +x = fread("x\n-1-01-01")$x +test(2368.1, year(x), -1L) +test(2368.2, month(x), 1L) +test(2368.3, mday(x), 1L) diff --git a/src/fread.c b/src/fread.c index 2902dfc50..70125ec32 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1087,9 +1087,13 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target) if (day == NA_INT32 || day < 1 || (day > (isLeapYear ? leapYearDays[month - 1] : normYearDays[month - 1]))) return; + int32_t cycle_year = year % 400; + if (cycle_year < 0) cycle_year += 400; + int32_t cycle = (year - cycle_year) / 400; + *target = - (year / 400 - 4) * cumDaysCycleYears[400] + // days to beginning of 400-year cycle - cumDaysCycleYears[year % 400] + // days to beginning of year within 400-year cycle + (cycle - 4) * cumDaysCycleYears[400] + // days to beginning of 400-year cycle + cumDaysCycleYears[cycle_year] + // days to beginning of year within 400-year cycle (isLeapYear ? cumDaysCycleMonthsLeap[month - 1] : cumDaysCycleMonthsNorm[month - 1]) + // days to beginning of month within year day - 1; // day within month (subtract 1: 1970-01-01 -> 0)