Skip to content

Commit

Permalink
Fix vcat to handle type promotion and preserve PDAs
Browse files Browse the repository at this point in the history
  • Loading branch information
garborg committed Dec 1, 2014
1 parent 23e090d commit 2293996
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 81 deletions.
51 changes: 35 additions & 16 deletions src/abstractdataframe/abstractdataframe.jl
Expand Up @@ -317,37 +317,56 @@ Base.vcat(df::AbstractDataFrame) = df
Base.vcat(dfs::AbstractDataFrame...) = vcat(collect(dfs))

function Base.vcat{T<:AbstractDataFrame}(dfs::Vector{T})
Nrow = sum(nrow, dfs)
# build up column names and eltypes
colnams = names(dfs[1])
coltyps = eltypes(dfs[1])
for i in 2:length(dfs)
cni = names(dfs[i])
cti = eltypes(dfs[i])
for j in 1:length(cni)
cn = cni[j]
if !in(cn, colnams) # new column
push!(colnams, cn)
push!(coltyps, cti[j])
end
end
end
coltyps, colnams, firstcols = _colinfo(dfs)

res = DataFrame()
Nrow = sum(nrow, dfs)
for j in 1:length(colnams)
col = DataArray(coltyps[j], Nrow)
colnam = colnams[j]
col = similar(firstcols[j], coltyps[j], Nrow)

i = 1
for df in dfs
if haskey(df, colnam)
copy!(col, i, df[colnam])
end
i += size(df, 1)
end

res[colnam] = col
end
res
end

function _colinfo{T<:AbstractDataFrame}(dfs::Vector{T})
colindex = copy(index(dfs[1]))
coltyps = eltypes(dfs[1])
firstcols = collect(columns(dfs[1]))

for i in 2:length(dfs)
df = dfs[i]
cni = names(df)
cti = eltypes(df)
for j in 1:length(cni)
cn, ct = cni[j], cti[j]
if haskey(colindex, cn)
idx = colindex[cn]
oldtyp = coltyps[idx]
if oldtyp != ct
coltyps[idx] = promote_type(oldtyp, ct)
end
else # new column
push!(colindex, cn)
push!(coltyps, ct)
push!(firstcols, df[cn])
end
end
end
colnams = names(colindex)

coltyps, colnams, firstcols
end

##############################################################################
##
## Hashing
Expand Down
88 changes: 88 additions & 0 deletions test/cat.jl
@@ -0,0 +1,88 @@
module TestCat
using Base.Test
using DataFrames

#
# hcat
#

dvint = @data([1, 2, NA, 4])
dvstr = @data(["one", "two", NA, "four"])

df2 = DataFrame(Any[dvint, dvstr])
df3 = DataFrame(Any[dvint])
df4 = convert(DataFrame, [1:4 1:4])
df5 = DataFrame(Any[@data([1,2,3,4]), dvstr])

dfh = hcat(df3, df4)
@test size(dfh, 2) == 3
@test names(dfh) == [:x1, :x1_1, :x2]
@test isequal(dfh[:x1], df3[:x1])
@test isequal(dfh, [df3 df4])

dfh3 = hcat(df3, df4, df5)
@test names(dfh3) == [:x1, :x1_1, :x2, :x1_2, :x2_1]
@test isequal(dfh3, hcat(dfh, df5))

#
# vcat
#

null_df = DataFrame(Int, 0, 0)
df = DataFrame(Int, 4, 3)

# Assignment of rows
df[1, :] = df[1, :]
df[1:2, :] = df[1:2, :]

# Broadcasting assignment of rows
df[1, :] = 1

# Assignment of columns
df[1] = zeros(4)

# Broadcasting assignment of columns
df[:, 1] = 1
df[1] = 3
df[:x3] = 2

vcat(null_df)
vcat(null_df, null_df)
vcat(null_df, df)
vcat(df, null_df)
vcat(df, df)
vcat(df, df, df)

alt_df = deepcopy(df)
vcat(df, alt_df)
df[1] = zeros(Int, nrow(df))
# Fail on non-matching types
vcat(df, alt_df)

alt_df = deepcopy(df)
names!(alt_df, [:A, :B, :C])
# Fail on non-matching names
vcat(df, alt_df)

dfr = vcat(df4, df4)
@test size(dfr, 1) == 8
@test names(df4) == names(dfr)
@test isequal(dfr, [df4, df4])

dfr = vcat(df2, df3)
@test size(dfr) == (8,2)
@test names(df2) == names(dfr)
@test isna(dfr[8,:x2])

@test eltypes(vcat(DataFrame(a = [1]), DataFrame(a = [2.1]))) == [Float64]

dfa = DataFrame(a = @pdata([1, 2, 2]))
dfb = DataFrame(a = @pdata([2, 3, 4]))
# dfc = DataFrame(a = @data([2, 3, 4]))
# dfd = DataFrame(Any[2:4], [:a])
@test vcat(dfa, dfb)[:a] == @pdata([1, 2, 2, 2, 3, 4])
# @test vcat(dfa, dfc)[:a] == @data([1, 2, 2, 2, 3, 4])
# @test vcat(dfc, dfd) == vcat(dfd, dfc)
# ^^ if/when container promotion happens in Base/DataArrays

end
22 changes: 0 additions & 22 deletions test/data.jl
Expand Up @@ -41,28 +41,6 @@ module TestData
@test size(head(df6,2)) == (2, 3)
# lots more to do

#test_group("hcat")
dfc = hcat(df3, df4)
@test size(dfc, 2) == 3
@test names(dfc) == [:x1, :x1_1, :x2]
@test isequal(dfc[:x1], df3[:x1])
@test isequal(dfc, [df3 df4])

dfc3 = hcat(df3, df4, df5)
@test names(dfc3) == [:x1, :x1_1, :x2, :x1_2, :x2_1]
@test isequal(dfc3, hcat(dfc, df5))

#test_group("vcat")
dfr = vcat(df4, df4)
@test size(dfr, 1) == 8
@test names(df4) == names(dfr)
@test isequal(dfr, [df4, df4])

dfr = vcat(df2, df3)
@test size(dfr) == (8,2)
@test names(df2) == names(dfr)
@test isna(dfr[8,:x2])

#test_group("assign")
df6[3] = @data(["un", "deux", "troix", "quatre"])
@test df6[1, 3] == "un"
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Expand Up @@ -10,6 +10,7 @@ using Base.Test
using DataFrames

my_tests = ["utils.jl",
"cat.jl",
"data.jl",
"index.jl",
"dataframe.jl",
Expand All @@ -22,7 +23,6 @@ my_tests = ["utils.jl",
"sort.jl",
"grouping.jl",
"join.jl",
"vcat.jl",
"iteration.jl",
"duplicates.jl",
"show.jl"]
Expand Down
42 changes: 0 additions & 42 deletions test/vcat.jl

This file was deleted.

0 comments on commit 2293996

Please sign in to comment.