diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md index d7356990b2..606bdad053 100644 --- a/docs/src/man/getting_started.md +++ b/docs/src/man/getting_started.md @@ -471,3 +471,70 @@ CSV.write(output, df) ``` The behavior of CSV functions can be adapted via keyword arguments. For more information, see `?CSV.read` and `?CSV.write`, or checkout the online [CSV.jl documentation](https://juliadata.github.io/CSV.jl/stable/). + +## Broadcasting + +When you broadcast a function over an `AbstractDataFrame` it is treated as an `AbstractVector` of rows and each row is represented as a `DataFrameRow`: + +```jldoctest dataframe +julia> df = DataFrame(A = 1:3, B = 3:-1:1) +3×2 DataFrame +│ Row │ A │ B │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 3 │ +│ 2 │ 2 │ 2 │ +│ 3 │ 3 │ 1 │ + +julia> identity.(df) +3-element Array{DataFrameRow{DataFrame},1}: + DataFrameRow (row 1) +A 1 +B 3 + DataFrameRow (row 2) +A 2 +B 2 + DataFrameRow (row 3) +A 3 +B 1 + +julia> copy.(df) +3-element Array{NamedTuple{(:A, :B),Tuple{Int64,Int64}},1}: + (A = 1, B = 3) + (A = 2, B = 2) + (A = 3, B = 1) + ``` + +In the last example we used the `copy` function which transforms a `DataFrameRow` into a `NamedTuple`. + +A `DataFrameRow` is treated as a collection of values stored in its columns so you can apply to it standard functions that accept collections and also broadcast functions over it to get a vector: + +```jldoctest dataframe +julia> df = DataFrame(A = 1:3, B = 3:-1:1) +3×2 DataFrame +│ Row │ A │ B │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 3 │ +│ 2 │ 2 │ 2 │ +│ 3 │ 3 │ 1 │ + +julia> dfr = df[1, :] +DataFrameRow (row 1) +A 1 +B 3 + +julia> sum(dfr) +4 + +julia> string.(dfr) +2-element Array{String,1}: + "1" + "3" + +julia> (row -> string.(row)).(df) +3-element Array{Array{String,1},1}: + ["1", "3"] + ["2", "2"] + ["3", "1"] +``` \ No newline at end of file diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 62b4e08103..d5179bffe8 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -223,6 +223,8 @@ Base.axes(df::AbstractDataFrame, i::Integer) = axes(df)[i] Base.ndims(::AbstractDataFrame) = 2 +Base.broadcastable(adf::AbstractDataFrame) = eachrow(adf) + Base.getproperty(df::AbstractDataFrame, col_ind::Symbol) = getindex(df, col_ind) Base.setproperty!(df::AbstractDataFrame, col_ind::Symbol, x) = setindex!(df, x, col_ind) # Private fields are never exposed since they can conflict with column names diff --git a/test/broadcasting.jl b/test/broadcasting.jl new file mode 100644 index 0000000000..45bde08335 --- /dev/null +++ b/test/broadcasting.jl @@ -0,0 +1,9 @@ +module TestDataFrame + using DataFrames, Test + + @testset "broadcast DataFrame & DataFrameRow" begin + df = DataFrame(x=1:4, y=5:8, z=9:12) + @test sum.(df) == [15, 18, 21, 24] + @test ((row -> row .+ 1)).(df) == [i .+ [0, 4, 8] for i in 2:5] + end +end diff --git a/test/runtests.jl b/test/runtests.jl index a4ccd5ad99..2926fb1994 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ my_tests = ["utils.jl", "tables.jl", "tabletraits.jl", "indexing.jl", + "broadcasting.jl", "deprecated.jl"] println("Running tests:")