From c828b23f138213067cc5991ed3bb01caf55fa675 Mon Sep 17 00:00:00 2001 From: Albin Heimerson Date: Thu, 20 Apr 2023 21:39:58 +0200 Subject: [PATCH 1/3] add test that fails --- test/basics.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/basics.jl b/test/basics.jl index 71a7c78f..9a0af50d 100644 --- a/test/basics.jl +++ b/test/basics.jl @@ -820,4 +820,9 @@ f = CSV.File(IOBuffer(str); delim=" ", header=false, types=(i,nm) -> (i == 5 ? I f = CSV.File(IOBuffer(str); delim=" ", header=false, types=Dict(r".*" => Float16)) @test Float16 <: eltype(f.Column5) +# 1080 +# bug in reading multiple files when a column shares name with a field in File +f = CSV.File(map(IOBuffer, ["name\n2\n", "name\n11\n"])) +@test f.name == [2, 11] + end From a5744896b792ea9fd1800394a6eee2acf7abbd03 Mon Sep 17 00:00:00 2001 From: Albin Heimerson Date: Thu, 20 Apr 2023 22:21:22 +0200 Subject: [PATCH 2/3] use getters for File field access --- src/file.jl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/file.jl b/src/file.jl index 239f975e..2bf9e93b 100644 --- a/src/file.jl +++ b/src/file.jl @@ -900,8 +900,8 @@ function File(sources::Vector; all(x -> x isa ValidSources, sources) || throw(ArgumentError("all provided sources must be one of: `$ValidSources`")) kws = merge(values(kw), (ntasks=1,)) f = File(sources[1]; kws...) - rows = f.rows - for col in f.columns + rows = getrows(f) + for col in getcolumns(f) col.column = ChainedVector([col.column]) end files = Vector{File}(undef, length(sources) - 1) @@ -910,16 +910,16 @@ function File(sources::Vector; files[i - 1] = File(sources[i]; kws...) end end - lookup = f.lookup + lookup = getlookup(f) for i = 2:length(sources) f2 = files[i - 1] - rows += f2.rows - fl2 = f2.lookup + rows += getrows(f2) + fl2 = getlookup(f2) for (nm, col) in lookup if haskey(fl2, nm) col.column = chaincolumns!(col.column, fl2[nm].column) else - col.column = chaincolumns!(col.column, MissingVector(f2.rows)) + col.column = chaincolumns!(col.column, MissingVector(getrows(f2))) end end end @@ -928,14 +928,14 @@ function File(sources::Vector; pushfirst!(files, f) vals = source isa Pair ? source.second : [f.name for f in files] pool = Dict(x => UInt32(i) for (i, x) in enumerate(vals)) - arr = PooledArray(PooledArrays.RefArray(ChainedVector([fill(UInt32(i), f.rows) for (i, f) in enumerate(files)])), pool) + arr = PooledArray(PooledArrays.RefArray(ChainedVector([fill(UInt32(i), getrows(f)) for (i, f) in enumerate(files)])), pool) col = Column(eltype(arr)) col.column = arr - push!(f.columns, col) + push!(getcolumns(f), col) colnm = Symbol(source isa Pair ? source.first : source) - push!(f.names, colnm) - push!(f.types, eltype(arr)) + push!(getnames(f), colnm) + push!(gettypes(f), eltype(arr)) f.lookup[colnm] = col end - return File(f.name, f.names, f.types, rows, f.cols, f.columns, f.lookup) + return File(getname(f), getnames(f), gettypes(f), rows, getcols(f), getcolumns(f), getlookup(f)) end From 0f81c9e21bcdeb81c2d3e5ecf37edfee2aca10a7 Mon Sep 17 00:00:00 2001 From: Albin Heimerson Date: Fri, 21 Apr 2023 07:42:44 +0200 Subject: [PATCH 3/3] fix two missed spots using getproperty --- src/file.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file.jl b/src/file.jl index 2bf9e93b..2d0c28c9 100644 --- a/src/file.jl +++ b/src/file.jl @@ -926,7 +926,7 @@ function File(sources::Vector; if source !== nothing # add file name of each "partition" as 1st column pushfirst!(files, f) - vals = source isa Pair ? source.second : [f.name for f in files] + vals = source isa Pair ? source.second : [getname(f) for f in files] pool = Dict(x => UInt32(i) for (i, x) in enumerate(vals)) arr = PooledArray(PooledArrays.RefArray(ChainedVector([fill(UInt32(i), getrows(f)) for (i, f) in enumerate(files)])), pool) col = Column(eltype(arr)) @@ -935,7 +935,7 @@ function File(sources::Vector; colnm = Symbol(source isa Pair ? source.first : source) push!(getnames(f), colnm) push!(gettypes(f), eltype(arr)) - f.lookup[colnm] = col + getlookup(f)[colnm] = col end return File(getname(f), getnames(f), gettypes(f), rows, getcols(f), getcolumns(f), getlookup(f)) end