From 8e1dcb617200a4ca18575dba8ea18ed22474c470 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 27 Oct 2017 21:46:28 +0200 Subject: [PATCH] Improve rename API Using pairs is more convenient, more flexible and more consistent. --- docs/src/man/joins.md | 4 +-- src/abstractdataframe/abstractdataframe.jl | 30 ++++++++++++++-------- src/deprecated.jl | 7 +++++ src/other/index.jl | 8 +++--- test/dataframe.jl | 21 +++++++++++++-- test/index.jl | 8 +++--- 6 files changed, 54 insertions(+), 24 deletions(-) diff --git a/docs/src/man/joins.md b/docs/src/man/joins.md index 94c7e8452e..c99ee7c66d 100644 --- a/docs/src/man/joins.md +++ b/docs/src/man/joins.md @@ -56,7 +56,7 @@ In order to join data tables on keys which have different names, you must first ```julia a = DataFrame(ID = [20, 40], Name = ["John Doe", "Jane Doe"]) b = DataFrame(IDNew = [20, 40], Job = ["Lawyer", "Doctor"]) -rename!(b, :IDNew, :ID) +rename!(b, :IDNew => :ID) join(a, b, on = :ID, kind = :inner) ``` @@ -69,6 +69,6 @@ a = DataFrame(City = ["Amsterdam", "London", "London", "New York", "New York"], b = DataFrame(Location = ["Amsterdam", "London", "London", "New York", "New York"], Work = ["Lawyer", "Lawyer", "Lawyer", "Doctor", "Doctor"], Name = ["a", "b", "c", "d", "e"]) -rename!(b, [:Location => :City, :Work => :Job]) +rename!(b, :Location => :City, :Work => :Job) join(a, b, on = [:City, :Job]) ``` diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 1ff4ee4246..a2a503ecf2 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -138,28 +138,35 @@ function rename!(df::AbstractDataFrame, args...) rename!(index(df), args...) return df end -rename!(f::Function, df::AbstractDataFrame) = rename!(df, f) +function rename!(f::Function, df::AbstractDataFrame) + rename!(f, index(df)) + return df +end rename(df::AbstractDataFrame, args...) = rename!(copy(df), args...) -rename(f::Function, df::AbstractDataFrame) = rename(df, f) +rename(f::Function, df::AbstractDataFrame) = rename!(f, copy(df)) """ Rename columns ```julia -rename!(df::AbstractDataFrame, from::Symbol, to::Symbol) -rename!(df::AbstractDataFrame, d::Associative) +rename!(df::AbstractDataFrame, (from => to)::Pair{Symbol, Symbol}...) +rename!(df::AbstractDataFrame, d::Associative{Symbol,Symbol}) +rename!(df::AbstractDataFrame, d::AbstractArray{Pair{Symbol,Symbol}}) rename!(f::Function, df::AbstractDataFrame) -rename(df::AbstractDataFrame, from::Symbol, to::Symbol) +rename(df::AbstractDataFrame, (from => to)::Pair{Symbol, Symbol}...) +rename(df::AbstractDataFrame, d::Associative{Symbol,Symbol}) +rename(df::AbstractDataFrame, d::AbstractArray{Pair{Symbol,Symbol}}) rename(f::Function, df::AbstractDataFrame) ``` **Arguments** * `df` : the AbstractDataFrame -* `d` : an Associative type that maps the original name to a new name -* `f` : a function that has the old column name (a symbol) as input - and new column name (a symbol) as output +* `d` : an Associative type or an AbstractArray of pairs that maps + the original names to new names +* `f` : a function which for each column takes the old name (a Symbol) + and returns the new name (a Symbol) **Result** @@ -169,10 +176,11 @@ rename(f::Function, df::AbstractDataFrame) ```julia df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10)) +rename(df, :i => :A, :x => :X) +rename(df, [:i => :A, :x => :X]) +rename(df, Dict(:i => :A, :x => :X)) rename(x -> Symbol(uppercase(string(x))), df) -rename(df, Dict(:i=>:A, :x=>:X)) -rename(df, :y, :Y) -rename!(df, Dict(:i=>:A, :x=>:X)) +rename!(df, Dict(:i =>: A, :x => :X)) ``` """ diff --git a/src/deprecated.jl b/src/deprecated.jl index d91775cdce..6130171ba7 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1267,3 +1267,10 @@ macro tsv_str(s, flags...) :tsv_str) inlinetable(s, flags...; separator='\t') end + +@deprecate rename!(x::AbstractDataFrame, from::AbstractArray, to::AbstractArray) rename!(x, [f=>t for (f, t) in zip(from, to)]) +@deprecate rename!(x::AbstractDataFrame, from::Symbol, to::Symbol) rename!(x, from => to) +@deprecate rename!(x::Index, f::Function) rename!(f, x) +@deprecate rename(x::AbstractDataFrame, from::AbstractArray, to::AbstractArray) rename(x, [f=>t for (f, t) in zip(from, to)]) +@deprecate rename(x::AbstractDataFrame, from::Symbol, to::Symbol) rename(x, from => to) +@deprecate rename(x::Index, f::Function) rename(f, x) \ No newline at end of file diff --git a/src/other/index.jl b/src/other/index.jl index f0a4c0e527..6824ba66dc 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -44,13 +44,11 @@ function rename!(x::Index, nms) return x end -rename!(x::Index, from, to) = rename!(x, zip(from, to)) -rename!(x::Index, from::Symbol, to::Symbol) = rename!(x, ((from, to),)) -rename!(x::Index, f::Function) = rename!(x, [(x,f(x)) for x in x.names]) -rename!(f::Function, x::Index) = rename!(x, f) +rename!(x::Index, nms::Pair{Symbol,Symbol}...) = rename!(x::Index, collect(nms)) +rename!(f::Function, x::Index) = rename!(x, [(x=>f(x)) for x in x.names]) rename(x::Index, args...) = rename!(copy(x), args...) -rename(f::Function, x::Index) = rename(x, f) +rename(f::Function, x::Index) = rename!(f, copy(x)) Base.haskey(x::Index, key::Symbol) = haskey(x.lookup, key) Base.haskey(x::Index, key::Real) = 1 <= key <= length(x.names) diff --git a/test/dataframe.jl b/test/dataframe.jl index 58eb2a0581..5964ae38de 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -381,10 +381,27 @@ module TestDataFrame @testset "rename" begin df = DataFrame(A = 1:3, B = 'A':'C') - @test names(rename(df, :A, :A_1)) == [:A_1, :B] + @test names(rename(df, :A => :A_1)) == [:A_1, :B] @test names(df) == [:A, :B] - @test names(rename!(df, :A, :A_1)) == [:A_1, :B] + @test names(rename(df, :A => :A_1, :B => :B_1)) == [:A_1, :B_1] + @test names(df) == [:A, :B] + @test names(rename(df, [:A => :A_1, :B => :B_1])) == [:A_1, :B_1] + @test names(df) == [:A, :B] + @test names(rename(df, Dict(:A => :A_1, :B => :B_1))) == [:A_1, :B_1] + @test names(df) == [:A, :B] + @test names(rename(x->Symbol(lowercase(string(x))), df)) == [:a, :b] + @test names(df) == [:A, :B] + + @test rename!(df, :A => :A_1) === df @test names(df) == [:A_1, :B] + @test rename!(df, :A_1 => :A_2, :B => :B_2) === df + @test names(df) == [:A_2, :B_2] + @test rename!(df, [:A_2 => :A_3, :B_2 => :B_3]) === df + @test names(df) == [:A_3, :B_3] + @test rename!(df, Dict(:A_3 => :A_4, :B_3 => :B_4)) === df + @test names(df) == [:A_4, :B_4] + @test rename!(x->Symbol(lowercase(string(x))), df) === df + @test names(df) == [:a_4, :b_4] end @testset "size" begin diff --git a/test/index.jl b/test/index.jl index a670fde451..efab6d7edd 100644 --- a/test/index.jl +++ b/test/index.jl @@ -33,10 +33,10 @@ end @test_throws ArgumentError names!(i, [:a,:a]) @test names!(i, [:a,:b]) == Index([:a,:b]) @test rename(i, Dict(:a=>:A, :b=>:B)) == Index([:A,:B]) -@test rename(i, :a, :A) == Index([:A,:b]) -@test rename(i, :a, :a) == Index([:a,:b]) -@test rename(i, [:a], [:A]) == Index([:A,:b]) -@test rename(i, [:a], [:a]) == Index([:a,:b]) +@test rename(i, :a => :A) == Index([:A,:b]) +@test rename(i, :a => :a) == Index([:a,:b]) +@test rename(i, [:a => :A]) == Index([:A,:b]) +@test rename(i, [:a => :a]) == Index([:a,:b]) @test rename(x->Symbol(uppercase(string(x))), i) == Index([:A,:B]) @test rename(x->Symbol(lowercase(string(x))), i) == Index([:a,:b])