Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

In-place broadcast assignment #3206

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export AbstractDataFrame,
GroupedDataFrame,
SubDataFrame,
Tables,
@alias,
allcombinations,
allowmissing!,
antijoin,
Expand Down Expand Up @@ -132,6 +133,7 @@ const METADATA_FIXED =

include("other/utils.jl")
include("other/index.jl")
include("other/alias.jl")

include("abstractdataframe/abstractdataframe.jl")
include("dataframe/dataframe.jl")
Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3223,7 +3223,7 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
firstindex(item_new) != 1 && _onebased_check_error()

if ncol(dfp) == 0
dfp[!, name] = item_new
@alias dfp[!, name] = item_new
else
if hasproperty(dfp, name)
@assert makeunique
Expand Down
18 changes: 11 additions & 7 deletions src/abstractdataframe/selection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,13 @@ function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame,
@assert parent_cols isa Union{Int, AbstractVector{Int}}
if copycols
if !(fun isa ByRow) && (v isa SubArray || any(i -> vpar === parent(cdf[i]), parent_cols))
newdf[!, newname] = copy(v)
else
newdf[!, newname] = v
else
@alias newdf[!, newname] = v
end
else
if fun isa ByRow
newdf[!, newname] = v
@alias newdf[!, newname] = v
else
must_copy = false
for i in parent_cols
Expand All @@ -702,7 +702,11 @@ function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame,
end
end
end
newdf[!, newname] = must_copy ? copy(v) : v
if must_copy
newdf[!, newname] = v
else
@alias newdf[!, newname] = v
end
end
end
end
Expand Down Expand Up @@ -807,7 +811,7 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr
end
newres = DataFrame()
for n in kp1
newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res]
@alias newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res]
end
res = newres
elseif !(res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix})
Expand Down Expand Up @@ -872,7 +876,7 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr
# allow squashing a scalar to 0 rows
rows = nrow(newdf)
end
newdf[!, newname] = fill!(Tables.allocatecolumn(typeof(res_unwrap), rows),
@alias newdf[!, newname] = fill!(Tables.allocatecolumn(typeof(res_unwrap), rows),
res_unwrap)
if (col_idx isa Int || (col_idx isa AbstractVector{Int} && length(col_idx) == 1)) &&
(fun === identity || fun === copy || _names(df)[only(col_idx)] == newname)
Expand Down Expand Up @@ -1756,7 +1760,7 @@ function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols
end
end
# here even if keeprows is true all is OK
newdf[!, newname] = column_to_copy[i] ? df[:, i] : df[!, i]
@alias newdf[!, newname] = column_to_copy[i] ? df[:, i] : df[!, i]
column_to_copy[i] = true
allow_resizing_newdf[] = false
_copy_col_note_metadata!(newdf, newname, df, i)
Expand Down
64 changes: 47 additions & 17 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ end

function _preprocess_column(col::Any, len::Integer, copycols::Bool)
if col isa AbstractRange
return collect(col)
return copycols ? collect(col) : col
elseif col isa AbstractVector
return copycols ? copy(col) : col
elseif col isa Union{AbstractArray{<:Any, 0}, Ref}
Expand Down Expand Up @@ -632,11 +632,11 @@ Base.getindex(df::DataFrame, row_ind::typeof(!), col_inds::MultiColumnIndex) =
##############################################################################

# Will automatically add a new column if needed
function insert_single_column!(df::DataFrame, v::AbstractVector, col_ind::ColumnIndex)
if ncol(df) != 0 && nrow(df) != length(v)
throw(ArgumentError("New columns must have the same length as old columns"))
function insert_single_column!(df::DataFrame, v::Any, col_ind::ColumnIndex; copycols = true)
dv = _preprocess_column(v, nrow(df), copycols)
if ncol(df) != 0 && nrow(df) != length(dv)
throw(DimensionMismatch("Length of the column ($(length(dv))) and rows in the data frame ($(nrow(df))) are not equal"))
end
dv = isa(v, AbstractRange) ? collect(v) : v
firstindex(dv) != 1 && _onebased_check_error()

if haskey(index(df), col_ind)
Expand Down Expand Up @@ -665,23 +665,27 @@ function insert_single_entry!(df::DataFrame, v::Any, row_ind::Integer, col_ind::
end

# df[!, SingleColumnIndex] = AbstractVector
function Base.setindex!(df::DataFrame, v::AbstractVector, ::typeof(!), col_ind::ColumnIndex)
function Base.setindex!(df::DataFrame, v::AbstractVector, ::typeof(!), col_ind::ColumnIndex; copycols = true)
insert_single_column!(df, v, col_ind; copycols)
return df
end

# df[!, SingleColumnIndex] = value
function Base.setindex!(df::DataFrame, v::Any, ::typeof(!), col_ind::ColumnIndex)
insert_single_column!(df, v, col_ind)
return df
end

# df.col = AbstractVector
# df.col = value
# separate methods are needed due to dispatch ambiguity
Base.setproperty!(df::DataFrame, col_ind::Symbol, v::AbstractVector) =
Base.setproperty!(df::DataFrame, col_ind::Symbol, v::AbstractVector; copycols = true) =
setindex!(df, v, !, col_ind; copycols)
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::AbstractVector; copycols = true) =
setindex!(df, v, !, col_ind; copycols)
Base.setproperty!(df::DataFrame, col_ind::Symbol, v::Any) =
(df[!, col_ind] = v)
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::AbstractVector) =
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::Any) =
(df[!, col_ind] = v)
Base.setproperty!(::DataFrame, col_ind::Symbol, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(::DataFrame, col_ind::AbstractString, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))

# df[SingleRowIndex, SingleColumnIndex] = Single Item
function Base.setindex!(df::DataFrame, v::Any, row_ind::Integer, col_ind::ColumnIndex)
Expand Down Expand Up @@ -718,7 +722,7 @@ for T in (:AbstractVector, :Not, :Colon)
row_inds::$T,
col_ind::ColumnIndex)
if row_inds isa Colon && !haskey(index(df), col_ind)
df[!, col_ind] = copy(v)
df[!, col_ind] = v
return df
end
x = df[!, col_ind]
Expand Down Expand Up @@ -786,6 +790,28 @@ for T1 in (:AbstractVector, :Not, :Colon, :(typeof(!))),
end
end

for T1 in (:(typeof(!)),),
T2 in MULTICOLUMNINDEX_TUPLE
@eval function Base.setindex!(df::DataFrame,
v::AbstractVector,
row_inds::$T1,
col_inds::$T2)
throw(ArgumentError("a vector can not be assigned to multiple rows and columns, consider reshaping to a matrix first"))
end

@eval function Base.setindex!(df::DataFrame,
v::Any,
row_inds::$T1,
col_inds::$T2)
idxs = index(df)[col_inds]
for col in idxs
# this will drop metadata appropriately
df[row_inds, col] = v
end
return df
end
end

"""
copy(df::DataFrame; copycols::Bool=true)

Expand Down Expand Up @@ -1191,7 +1217,11 @@ function hcat!(df1::DataFrame, df2::AbstractDataFrame;
_drop_all_nonnote_metadata!(df1)
_keep_matching_table_note_metadata!(df1, df2)
for i in 1:length(u)
df1[!, u[i]] = copycols ? df2[:, i] : df2[!, i]
if copycols
df1[!, u[i]] = df2[!, i]
else
@alias df1[!, u[i]] = df2[!, i]
end
_copy_col_note_metadata!(df1, u[i], df2, i)
end

Expand Down
31 changes: 31 additions & 0 deletions src/other/alias.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
@alias df.col = v
@alias df[!, :col] = v

df::DataFrame
v::AbstractVector

Assigns v to the column `col` without copying. Such that `df.col === v === df]!, :col]```.
Any AbstractVector is permissable, this may limit what operations are possible to do on `df` afterwards.
For instance after `@alias df.col = 1:3` it won't be possible to change the number of rows because UnitRange does not support resizing.
"""
macro alias(ex)
if !Meta.isexpr(ex, :(=))
throw(ArgumentError("Invalid use of @alias macro: argument must be a column assignment `df.col = v` or `df[!, col] = v`"))
end

lhs, rhs = ex.args

if Meta.isexpr(lhs, :ref)
ex = :(setindex!($(lhs.args[1]), $rhs, $(lhs.args[2:end]...); copycols = false); $rhs)

elseif Meta.isexpr(lhs, :.)
ex = :(setproperty!($(lhs.args...), $rhs; copycols = false); $rhs)

else
throw(ArgumentError("Invalid use of @alias macro: argument must be a column assignment `df.col = v` or `df[!, col] = v`"))

end

esc(ex)
end
29 changes: 11 additions & 18 deletions src/other/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ function Base.maybeview(df::AbstractDataFrame, rows, cols)
return view(df, rows, cols)
end

# df[:, cols] .= ...
function Base.dotview(df::AbstractDataFrame, ::Colon, cols::ColumnIndex)
if haskey(index(df), cols)
_drop_all_nonnote_metadata!(parent(df))
Expand All @@ -168,28 +169,20 @@ function Base.dotview(df::AbstractDataFrame, ::Colon, cols::ColumnIndex)
return LazyNewColDataFrame(df, Symbol(cols))
end

function Base.dotview(df::AbstractDataFrame, ::typeof(!), cols)
if !(cols isa ColumnIndex)
return ColReplaceDataFrame(df, convert(Vector{Int}, index(df)[cols]))
end
if cols isa SymbolOrString
if columnindex(df, cols) == 0 && !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
elseif !(1 <= cols <= ncol(df))
throw(ArgumentError("creating new columns using an integer index is disallowed"))
end
return LazyNewColDataFrame(df, cols isa AbstractString ? Symbol(cols) : cols)
# df[!, cols] .= ...
function Base.dotview(df::AbstractDataFrame, ::typeof(!), cols::Any)
return ColReplaceDataFrame(df, convert(Vector{Int}, index(df)[cols]))
end
function Base.dotview(df::AbstractDataFrame, ::typeof(!), cols::ColumnIndex)
_drop_all_nonnote_metadata!(parent(df))
return df[!, cols]
end

if isdefined(Base, :dotgetproperty) # Introduced in Julia 1.7
# df.col .= ...
function Base.dotgetproperty(df::AbstractDataFrame, col::SymbolOrString)
if columnindex(df, col) == 0 && !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
return LazyNewColDataFrame(df, Symbol(col))
_drop_all_nonnote_metadata!(parent(df))
return df[!, col]
end
end

Expand Down
19 changes: 9 additions & 10 deletions src/subdataframe/subdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,7 @@ Base.@propagate_inbounds function Base.setindex!(sdf::SubDataFrame, val::Any, ::
"columns of its parent data frame is disallowed"))
end
if !(val isa AbstractVector && nrow(sdf) == length(val))
throw(ArgumentError("Assigned value must be a vector with length " *
"equal to number of rows in the SubDataFrame"))
throw(DimensionMismatch("Length of the assigned column ($(length(val))) and rows in the SubDataFrame ($(nrow(sdf))) are not equal"))
end
T = eltype(val)
newcol = similar(val, Union{T, Missing}, nrow(parent(sdf)))
Expand All @@ -209,7 +208,7 @@ end
# and then define methods for them)
# consider merging SubDataFrame and DataFrame setindex! methods

function Base.setindex!(sdf::SubDataFrame, v::AbstractVector,
function Base.setindex!(sdf::SubDataFrame, v::Any,
::typeof(!), col_ind::ColumnIndex)
if col_ind isa Union{Signed, Unsigned} && !(1 <= col_ind <= ncol(sdf))
throw(ArgumentError("Cannot assign to non-existent column: $col_ind"))
Expand All @@ -219,15 +218,17 @@ function Base.setindex!(sdf::SubDataFrame, v::AbstractVector,
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
v = _preprocess_column(v, nrow(sdf), false)
sdf[:, col_ind] = v
else
pdf = parent(sdf)
p_col_ind = parentcols(index(sdf), col_ind)
old_col = pdf[!, p_col_ind]
T = eltype(old_col)
S = eltype(v)
S = v isa AbstractVector ? eltype(v) : typeof(v)
newcol = similar(old_col, promote_type(T, S), length(old_col))
newcol .= old_col
v = _preprocess_column(v, nrow(sdf), false)
newcol[rows(sdf)] = v
pdf[!, p_col_ind] = newcol
end
Expand Down Expand Up @@ -278,12 +279,10 @@ Base.setproperty!(df::SubDataFrame, col_ind::Symbol, v::AbstractVector) =
(df[!, col_ind] = v)
Base.setproperty!(df::SubDataFrame, col_ind::AbstractString, v::AbstractVector) =
(df[!, col_ind] = v)
Base.setproperty!(::SubDataFrame, col_ind::Symbol, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a SubDataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(::SubDataFrame, col_ind::AbstractString, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a SubDataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(df::SubDataFrame, col_ind::Symbol, v::Any) =
(df[!, col_ind] = v)
Base.setproperty!(df::SubDataFrame, col_ind::AbstractString, v::Any) =
(df[!, col_ind] = v)

##############################################################################
##
Expand Down
68 changes: 68 additions & 0 deletions test/alias.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
module TestAliasAssignment

using Test, DataFrames
const ≅ = isequal

@testset "Aliasing asignment" begin
@testset "$init" for init in [[], Matrix([4 5 6]'), [4 5;6 7;8 9]]
dfr = DataFrame(init, :auto)
@testset "$v" for v in [
[1,2,3],
1:3,
]
@testset "df.x2 = v" begin
df = copy(dfr)
a = @alias df.x2 = v
@test a === v
@test df.x2 === df[!, :x2] === v
df.x2 = v
@test df.x2 !== v
@test df.x2 ≅ v
a = @alias df.x2 = v
@test a === v
@test df.x2 === df[!, :x2] === v
end

@testset "df[!, :x2] = v" begin
df = copy(dfr)
a = @alias df[!, :x2] = v
@test a === v
@test df.x2 === df[!, :x2] === v
df[!, :x2] = v
@test df[!, :x2] !== v
@test df[!, :x2] ≅ v
a = @alias df[!, :x2] = v
@test a === v
@test df.x2 === df[!, :x2] === v
end
end

@testset "Invalid use of alias macro" begin
ex = try
eval(:(@alias 1))
catch ex
ex
end
@test ex.error isa ArgumentError

df = copy(dfr)
ex = try
eval(:(@alias df.x .= 1))
catch ex
ex
end
@test ex.error isa ArgumentError

struct S; x; end
s = S(1)
@test_throws MethodError @alias s.x = 1
@test_throws MethodError @alias s.x2 = 1

@test_throws MethodError @alias df.x2 = 1
@test_throws MethodError @alias df[!, :x2] = 1
@test_throws MethodError @alias df[:, :x2] = 1
end
end
end

end # module
Loading