Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix getproperty calls on fields for File #1081

Merged
merged 3 commits into from Apr 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 13 additions & 13 deletions src/file.jl
Expand Up @@ -900,8 +900,8 @@ function File(sources::Vector;
all(x -> x isa ValidSources, sources) || throw(ArgumentError("all provided sources must be one of: `$ValidSources`"))
kws = merge(values(kw), (ntasks=1,))
f = File(sources[1]; kws...)
rows = f.rows
for col in f.columns
rows = getrows(f)
for col in getcolumns(f)
col.column = ChainedVector([col.column])
end
files = Vector{File}(undef, length(sources) - 1)
Expand All @@ -910,32 +910,32 @@ function File(sources::Vector;
files[i - 1] = File(sources[i]; kws...)
end
end
lookup = f.lookup
lookup = getlookup(f)
for i = 2:length(sources)
f2 = files[i - 1]
rows += f2.rows
fl2 = f2.lookup
rows += getrows(f2)
fl2 = getlookup(f2)
for (nm, col) in lookup
if haskey(fl2, nm)
col.column = chaincolumns!(col.column, fl2[nm].column)
else
col.column = chaincolumns!(col.column, MissingVector(f2.rows))
col.column = chaincolumns!(col.column, MissingVector(getrows(f2)))
end
end
end
if source !== nothing
# add file name of each "partition" as 1st column
pushfirst!(files, f)
vals = source isa Pair ? source.second : [f.name for f in files]
vals = source isa Pair ? source.second : [getname(f) for f in files]
pool = Dict(x => UInt32(i) for (i, x) in enumerate(vals))
arr = PooledArray(PooledArrays.RefArray(ChainedVector([fill(UInt32(i), f.rows) for (i, f) in enumerate(files)])), pool)
arr = PooledArray(PooledArrays.RefArray(ChainedVector([fill(UInt32(i), getrows(f)) for (i, f) in enumerate(files)])), pool)
col = Column(eltype(arr))
col.column = arr
push!(f.columns, col)
push!(getcolumns(f), col)
colnm = Symbol(source isa Pair ? source.first : source)
push!(f.names, colnm)
push!(f.types, eltype(arr))
f.lookup[colnm] = col
push!(getnames(f), colnm)
push!(gettypes(f), eltype(arr))
getlookup(f)[colnm] = col
end
return File(f.name, f.names, f.types, rows, f.cols, f.columns, f.lookup)
return File(getname(f), getnames(f), gettypes(f), rows, getcols(f), getcolumns(f), getlookup(f))
end
5 changes: 5 additions & 0 deletions test/basics.jl
Expand Up @@ -820,4 +820,9 @@ f = CSV.File(IOBuffer(str); delim=" ", header=false, types=(i,nm) -> (i == 5 ? I
f = CSV.File(IOBuffer(str); delim=" ", header=false, types=Dict(r".*" => Float16))
@test Float16 <: eltype(f.Column5)

# 1080
# bug in reading multiple files when a column shares name with a field in File
f = CSV.File(map(IOBuffer, ["name\n2\n", "name\n11\n"]))
@test f.name == [2, 11]

end