Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Selectively reduce multithreaded parsing @error #1099

Merged
merged 1 commit into from Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/context.jl
Expand Up @@ -649,9 +649,10 @@ end
debug && println("single-threaded estimated rows = $origrowsguess, multi-threaded estimated rows = $rowsguess")
debug && println("multi-threaded column types sampled as: $columns")
else
@error "Multi-threaded parsing failed (are there newlines inside quoted fields?), falling back to single-threaded parsing"
# The following debug statement is doubled by a loud @warning or @error in parsefilechunk!
debug && println("multi-threaded parsing failed! Falling back to single thread, reinitializing column types.")
reinitialize_column_type!(columns, types, names, stringtype, streaming)
threaded = false
threaded = false # the failing is signaled by having !ctx.threaded && ctx.ntasks > 1
end
end
if !threaded
Expand Down
9 changes: 9 additions & 0 deletions src/file.jl
Expand Up @@ -564,6 +564,15 @@ function parsefilechunk!(ctx::Context, pos, len, rowsguess, rowoffset, columns,
rowsguess = newrowsguess
end
end
if !ctx.threaded && ctx.ntasks > 1 && !ctx.silencewarnings
# !ctx.threaded && ctx.ntasks > 1 indicate that multithreaded parsing failed.
# Thes messages echo the corresponding debug statement in the definition of ctx
if numwarnings[] > 0
@warn "Multithreaded parsing failed and fell back to single-threaded parsing, check previous warnings for possible reasons."
else
@error "Multithreaded parsing failed and fell back to single-threaded parsing. This can happen if the input contains multi-line fields; otherwise, please report this issue."
end
end
end
# done parsing (at least this chunk), so resize columns to final row count
for col in columns
Expand Down
13 changes: 13 additions & 0 deletions test/testfiles.jl
Expand Up @@ -709,3 +709,16 @@ testfile("test_basic.csv", (types=Dict(2=>Float64),),
(col1 = [1, 4, 7], col2 = [2.0, 5.0, 8.0], col3 = [3, 6, 9]);
dir=Path(dir)
)

# https://github.com/JuliaData/CSV.jl/pull/1099
@info "The following test is expected to @error with \"Multithreaded parsing failed...\""
testfile("test_multiline_field_errorwarning.csv", (ntasks=3,),
(20, 3),
NamedTuple{(:col1, :col2, :col3), Tuple{String3, String, Int}},
let col1 = [String3("A$i") for i in 1:19], col2 = [".$i" for i in 1:19], col3 = collect(1:19)
insert!(col1, 14, String3("foo"))
insert!(col2, 14, "a field to thwart all heuristics\n ,,,\n, ,\n , ,,\n, ,,\n , ,,\n,,\n ,,\n , ,\n , ,\n ,, , ,\n , ,,,\n, ,,\n\n, , , ,\n , ,\n\n ,,,\n,,,\n,,,\n ,,,\n\n,\n,\n")
insert!(col3, 14, 0)
(; col1, col2, col3)
end
)
45 changes: 45 additions & 0 deletions test/testfiles/test_multiline_field_errorwarning.csv
@@ -0,0 +1,45 @@
col1,col2,col3
A1,.1,1
A2,.2,2
A3,.3,3
A4,.4,4
A5,.5,5
A6,.6,6
A7,.7,7
A8,.8,8
A9,.9,9
A10,.10,10
A11,.11,11
A12,.12,12
A13,.13,13
foo,"a field to thwart all heuristics
,,,
, ,
, ,,
, ,,
, ,,
,,
,,
, ,
, ,
,, , ,
, ,,,
, ,,

, , , ,
, ,

,,,
,,,
,,,
,,,

,
,
",-0
A14,.14,14
A15,.15,15
A16,.16,16
A17,.17,17
A18,.18,18
A19,.19,19