Skip to content

Commit

Permalink
Merge f215c69 into b71841b
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Dec 14, 2017
2 parents b71841b + f215c69 commit 533497e
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 25 deletions.
8 changes: 6 additions & 2 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,12 @@ names!(df, [:a, :b, :a], allow_duplicates=true) # renames second :a to :a_1
```
"""
function names!(df::AbstractDataFrame, vals; allow_duplicates=false)
names!(index(df), vals; allow_duplicates=allow_duplicates)
function names!(df::AbstractDataFrame, vals; allow_duplicates=false, make_unique=false)
if allow_duplicates
Base.depwarn("Keyword allow_duplicates is deprecated. Use make_unique.", :names!)
make_unique = allow_duplicates
end
names!(index(df), vals; make_unique=make_unique)
return df
end

Expand Down
92 changes: 79 additions & 13 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -596,10 +596,34 @@ Base.setindex!(df::DataFrame, x::Void, col_ind::Int) = delete!(df, col_ind)

Base.empty!(df::DataFrame) = (empty!(df.columns); empty!(index(df)); df)

function Base.insert!(df::DataFrame, col_ind::Int, item::AbstractVector, name::Symbol)
# TODO: add docstring of make_unique after 0.4 release of DataFrames
function Base.insert!(df::DataFrame, col_ind::Int, item::AbstractVector, name::Symbol;
make_unique::Bool=false)
0 < col_ind <= ncol(df) + 1 || throw(BoundsError())
size(df, 1) == length(item) || size(df, 1) == 0 || error("number of rows does not match")

if name in _names(df)
if make_unique
k = 1
while true
# we only make sure that new column name is unique
# if df originally had duplicates in names we do not fix it
nn = Symbol("$(name)_$k")
if !(nn in _names(df))
name = nn
break
end
k += 1
end
else
# TODO: after 0.4 release of DataFrames remove depwarn and uncomment ArgumentError below
Base.depwarn("Inserting duplicate column name is deprecated.", :insert)
# msg = """Duplicate variable name $(name).
# Pass make_unique=true to make it unique using a suffix automatically."""
# throw(ArgumentError(msg))
end
end

insert!(index(df), col_ind, name)
insert!(df.columns, col_ind, item)
df
Expand All @@ -609,6 +633,31 @@ function Base.insert!(df::DataFrame, col_ind::Int, item, name::Symbol)
insert!(df, col_ind, upgrade_scalar(df, item), name)
end

"""
Merge `DataFrame`s
```julia
merge!(df::DataFrame, others::AbstractDataFrame...)
```
**Arguments**
* `df` : the DataFrame to merge into
* `others` : `AbstractDataFrame`s to be merged into `df`
**Result**
* `::DataFrame` : the updated result. Columns with duplicate names are overwritten.
**Examples**
```julia
df = DataFrame(id = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
df2 = DataFrame(id = 11:20, z = rand(10))
merge!(df, df2) # column z is added, column id is overwritten
```
"""
function Base.merge!(df::DataFrame, others::AbstractDataFrame...)
for other in others
for n in _names(other)
Expand Down Expand Up @@ -698,7 +747,16 @@ end
##############################################################################

# hcat! for 2 arguments, only a vector or a data frame is allowed
function hcat!(df1::DataFrame, df2::AbstractDataFrame)
function hcat!(df1::DataFrame, df2::AbstractDataFrame; make_unique::Bool=false)
common = intersect(_names(df1), _names(df2))
if !make_unique && length(common) > 0
# TODO: after 0.4 release of DataFrames remove depwarn and uncomment ArgumentError below
Base.depwarn("Inserting duplicate column names is deprecated.", :hcat!)
# msg = """Duplicate variable names $(common).
# Pass make_unique=true to make them unique using a suffix automatically."""
# throw(ArgumentError(msg))

end
u = add_names(index(df1), index(df2))
for i in 1:length(u)
df1[u[i]] = df2[i]
Expand All @@ -707,27 +765,35 @@ function hcat!(df1::DataFrame, df2::AbstractDataFrame)
end

# definition required to avoid hcat! ambiguity
function hcat!(df1::DataFrame, df2::DataFrame)
invoke(hcat!, Tuple{DataFrame, AbstractDataFrame}, df1, df2)
function hcat!(df1::DataFrame, df2::DataFrame; make_unique::Bool=false)
invoke(hcat!, Tuple{DataFrame, AbstractDataFrame}, df1, df2, make_unique=make_unique)
end

hcat!(df::DataFrame, x::AbstractVector) = hcat!(df, DataFrame(Any[x]))
hcat!(x::AbstractVector, df::DataFrame) = hcat!(DataFrame(Any[x]), df)
function hcat!(x, df::DataFrame)
hcat!(df::DataFrame, x::AbstractVector; make_unique::Bool=false) =
hcat!(df, DataFrame(Any[x]), make_unique=make_unique)
hcat!(x::AbstractVector, df::DataFrame; make_unique::Bool=false) =
hcat!(DataFrame(Any[x]), df, make_unique=make_unique)
function hcat!(x, df::DataFrame; make_unique::Bool=false)
throw(ArgumentError("x must be AbstractVector or AbstractDataFrame"))
end
function hcat!(df::DataFrame, x)
function hcat!(df::DataFrame, x; make_unique::Bool=false)
throw(ArgumentError("x must be AbstractVector or AbstractDataFrame"))
end

# hcat! for 1-n arguments
hcat!(df::DataFrame) = df
hcat!(a::DataFrame, b, c...) = hcat!(hcat!(a, b), c...)
hcat!(df::DataFrame; make_unique::Bool=false) = df
hcat!(a::DataFrame, b, c...; make_unique::Bool=false) =
hcat!(hcat!(a, b, make_unique=make_unique), c..., make_unique=make_unique)

# hcat
Base.hcat(df::DataFrame, x) = hcat!(copy(df), x)
Base.hcat(df1::DataFrame, df2::AbstractDataFrame) = hcat!(copy(df1), df2)
Base.hcat(df1::DataFrame, df2::AbstractDataFrame, dfn::AbstractDataFrame...) = hcat!(hcat(df1, df2), dfn...)
# TODO: add docstring of make_unique after 0.4 release of DataFrames
Base.hcat(df::DataFrame, x; make_unique::Bool=false) =
hcat!(copy(df), x, make_unique=make_unique)
Base.hcat(df1::DataFrame, df2::AbstractDataFrame; make_unique::Bool=false) =
hcat!(copy(df1), df2, make_unique=make_unique)
Base.hcat(df1::DataFrame, df2::AbstractDataFrame, dfn::AbstractDataFrame...;
make_unique::Bool=false) =
hcat!(hcat(df1, df2, make_unique=make_unique), dfn..., make_unique=make_unique)

##############################################################################
##
Expand Down
13 changes: 9 additions & 4 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ mutable struct Index <: AbstractIndex # an OrderedDict would be nice here...
lookup::Dict{Symbol, Int} # name => names array position
names::Vector{Symbol}
end
function Index(names::Vector{Symbol}; allow_duplicates=true)
u = make_unique(names, allow_duplicates=allow_duplicates)
function Index(names::Vector{Symbol}; make_unique=false)
u = _make_unique(names, make_unique=make_unique)
lookup = Dict{Symbol, Int}(zip(u, 1:length(u)))
Index(lookup, u)
end
Expand All @@ -22,11 +22,16 @@ Base.isequal(x::Index, y::Index) = isequal(x.lookup, y.lookup) && isequal(x.name
# Imported in DataFrames.jl for compatibility across Julia 0.4 and 0.5
Base.:(==)(x::Index, y::Index) = isequal(x, y)

function names!(x::Index, nms::Vector{Symbol}; allow_duplicates=false)
# TODO: after DataFrames release 0.4 change docstring of names!
function names!(x::Index, nms::Vector{Symbol}; allow_duplicates=false, make_unique=false)
if allow_duplicates
Base.depwarn("Keyword allow_duplicates is deprecated. Use make_unique.", :names!)
make_unique = allow_duplicates
end
if length(nms) != length(x)
throw(ArgumentError("Length of nms doesn't match length of x."))
end
newindex = Index(nms, allow_duplicates=allow_duplicates)
newindex = Index(nms, make_unique=make_unique)
x.names = newindex.names
x.lookup = newindex.lookup
return x
Expand Down
6 changes: 3 additions & 3 deletions src/other/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function makeidentifier(s::AbstractString)
return String(take!(res))
end

function make_unique(names::Vector{Symbol}; allow_duplicates=true)
function _make_unique(names::Vector{Symbol}; make_unique=true)
seen = Set{Symbol}()
names = copy(names)
dups = Int[]
Expand All @@ -58,10 +58,10 @@ function make_unique(names::Vector{Symbol}; allow_duplicates=true)
in(name, seen) ? push!(dups, i) : push!(seen, name)
end

if !allow_duplicates && length(dups) > 0
if !make_unique && length(dups) > 0
d = unique(names[dups])
msg = """Duplicate variable names: $d.
Pass allow_duplicates=true to make them unique using a suffix automatically."""
Pass make_unique=true to make them unique using a suffix automatically."""
throw(ArgumentError(msg))
end

Expand Down
8 changes: 8 additions & 0 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ module TestDataFrame
@test df[:b] == [3.0, 4.0]
@test df[:newcol] == ["a", "b"]

@test insert!(df, 1, ["a1", "b1"], :newcol, make_unique=true) == df
@test names(df) == [:newcol_1, :newcol, :a, :b]
@test df[:a] == [1, 2]
@test df[:b] == [3.0, 4.0]
@test df[:newcol] == ["a", "b"]
@test df[:newcol_1] == ["a1", "b1"]


df = DataFrame(a=[1, 2], b=[3.0, 4.0])
df2 = DataFrame(b=["a", "b"], c=[:c, :d])
@test merge!(df, df2) == df
Expand Down
6 changes: 3 additions & 3 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ module TestUtils
@test identifier("begin") == :_begin
@test identifier("end") == :_end

@test DataFrames.make_unique([:x, :x, :x_1, :x2]) == [:x, :x_2, :x_1, :x2]
@test_throws ArgumentError DataFrames.make_unique([:x, :x, :x_1, :x2], allow_duplicates=false)
@test DataFrames.make_unique([:x, :x_1, :x2], allow_duplicates=false) == [:x, :x_1, :x2]
@test DataFrames._make_unique([:x, :x, :x_1, :x2]) == [:x, :x_2, :x_1, :x2]
@test_throws ArgumentError DataFrames._make_unique([:x, :x, :x_1, :x2], allow_duplicates=false)
@test DataFrames._make_unique([:x, :x_1, :x2], allow_duplicates=false) == [:x, :x_1, :x2]

# Check that reserved words are up to date

Expand Down

0 comments on commit 533497e

Please sign in to comment.