JuliaData · nalimilan · May 31, 2018 · Apr 27, 2018 · nalimilan · Apr 28, 2018
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -236,6 +236,13 @@ Compat.axes(df, i) = axes(df)[i]
 
 Base.ndims(::AbstractDataFrame) = 2
 
+if VERSION >= v"0.7.0-DEV.3067"
+    Base.getproperty(df::AbstractDataFrame, col_ind::Symbol) = getindex(df, col_ind)
+    Base.setproperty!(df::AbstractDataFrame, col_ind::Symbol, x) = setindex!(df, x, col_ind)
+    # Private fields are never exposed since they can conflict with column names
+    Base.propertynames(df::AbstractDataFrame, private::Bool=false) = names(df)
+end
+
 ##############################################################################
 ##
 ## Similar

diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl
@@ -218,13 +218,11 @@ struct DataFrameStream{T}
     columns::T
     header::Vector{String}
 end
-DataFrameStream(df::DataFrame) = DataFrameStream(Tuple(df.columns), string.(names(df)))
+DataFrameStream(df::DataFrame) = DataFrameStream(Tuple(columns(df)), string.(names(df)))
 
 # DataFrame Data.Source implementation
-function Data.schema(df::DataFrame)
-    return Data.Schema(Type[eltype(A) for A in df.columns],
-                       string.(names(df)), length(df) == 0 ? 0 : length(df.columns[1]))
-end
+Data.schema(df::DataFrame) =
+    Data.Schema(Type[eltype(A) for A in columns(df)], string.(names(df)), size(df, 1))
 
 Data.isdone(source::DataFrame, row, col, rows, cols) = row > rows || col > cols
 function Data.isdone(source::DataFrame, row, col)
@@ -283,7 +281,7 @@ function DataFrame(sch::Data.Schema{R}, ::Type{S}=Data.Field,
                 # to the # of rows in the source
             newsize = ifelse(S == Data.Column || !R, 0,
                         ifelse(append, sinkrows + sch.rows, sch.rows))
-            foreach(col->resize!(col, newsize), sink.columns)
+            foreach(col->resize!(col, newsize), columns(sink))
             sch.rows = newsize
         end
         # take care of a possible reference from source by addint to WeakRefStringArrays
@@ -322,7 +320,7 @@ DataFrame(sink, sch::Data.Schema, ::Type{S}, append::Bool;
                        row, col::Int, knownrows)
     append!(sink.columns[col], column)
 end
-    
+
 Data.close!(df::DataFrameStream) =
     DataFrame(collect(Any, df.columns), Symbol.(df.header), makeunique=true)
 
diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -221,11 +221,11 @@ end
 ##
 ##############################################################################
 
-index(df::DataFrame) = df.colindex
-columns(df::DataFrame) = df.columns
+index(df::DataFrame) = getfield(df, :colindex)
+columns(df::DataFrame) = getfield(df, :columns)
 
 # TODO: Remove these
-nrow(df::DataFrame) = ncol(df) > 0 ? length(df.columns[1])::Int : 0
+nrow(df::DataFrame) = ncol(df) > 0 ? length(columns(df)[1])::Int : 0
 ncol(df::DataFrame) = length(index(df))
 
 ##############################################################################
@@ -247,7 +247,7 @@ ncol(df::DataFrame) = length(index(df))
 #
 # Let getindex(index(df), col_inds) from Index() handle the resolution
 #  of column indices
-# Let getindex(df.columns[j], row_inds) from AbstractVector() handle
+# Let getindex(columns(df)[j], row_inds) from AbstractVector() handle
 #  the resolution of row indices
 
 # TODO: change Real to Integer in this union after deprecation period
@@ -256,13 +256,13 @@ const ColumnIndex = Union{Real, Symbol}
 # df[SingleColumnIndex] => AbstractDataVector
 function Base.getindex(df::DataFrame, col_ind::ColumnIndex)
     selected_column = index(df)[col_ind]
-    return df.columns[selected_column]
+    return columns(df)[selected_column]
 end
 
 # df[MultiColumnIndex] => DataFrame
 function Base.getindex(df::DataFrame, col_inds::AbstractVector)
     selected_columns = index(df)[col_inds]
-    new_columns = df.columns[selected_columns]
+    new_columns = columns(df)[selected_columns]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
 end
 
@@ -272,26 +272,26 @@ Base.getindex(df::DataFrame, col_inds::Colon) = copy(df)
 # df[SingleRowIndex, SingleColumnIndex] => Scalar
 function Base.getindex(df::DataFrame, row_ind::Real, col_ind::ColumnIndex)
     selected_column = index(df)[col_ind]
-    return df.columns[selected_column][row_ind]
+    return columns(df)[selected_column][row_ind]
 end
 
 # df[SingleRowIndex, MultiColumnIndex] => DataFrame
 function Base.getindex(df::DataFrame, row_ind::Real, col_inds::AbstractVector)
     selected_columns = index(df)[col_inds]
-    new_columns = Any[dv[[row_ind]] for dv in df.columns[selected_columns]]
+    new_columns = Any[dv[[row_ind]] for dv in columns(df)[selected_columns]]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
 end
 
 # df[MultiRowIndex, SingleColumnIndex] => AbstractVector
 function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_ind::ColumnIndex)
     selected_column = index(df)[col_ind]
-    return df.columns[selected_column][row_inds]
+    return columns(df)[selected_column][row_inds]
 end
 
 # df[MultiRowIndex, MultiColumnIndex] => DataFrame
 function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_inds::AbstractVector)
     selected_columns = index(df)[col_inds]
-    new_columns = Any[dv[row_inds] for dv in df.columns[selected_columns]]
+    new_columns = Any[dv[row_inds] for dv in columns(df)[selected_columns]]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
 end
 
@@ -304,7 +304,7 @@ Base.getindex(df::DataFrame, row_ind::Real, col_inds::Colon) = df[[row_ind], col
 
 # df[MultiRowIndex, :] => DataFrame
 function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_inds::Colon)
-    new_columns = Any[dv[row_inds] for dv in df.columns]
+    new_columns = Any[dv[row_inds] for dv in columns(df)]
     return DataFrame(new_columns, copy(index(df)))
 end
 
@@ -339,15 +339,15 @@ function insert_single_column!(df::DataFrame,
     dv = isa(v, AbstractRange) ? collect(v) : v
     if haskey(index(df), col_ind)
         j = index(df)[col_ind]
-        df.columns[j] = dv
+        columns(df)[j] = dv
     else
         if typeof(col_ind) <: Symbol
             push!(index(df), col_ind)
-            push!(df.columns, dv)
+            push!(columns(df), dv)
         else
             if ncol(df) + 1 == Int(col_ind)
                 push!(index(df), nextcolname(df))
-                push!(df.columns, dv)
+                push!(columns(df), dv)
             else
                 throw(ArgumentError("Cannot assign to non-existent column: $col_ind"))
             end
@@ -358,7 +358,7 @@ end
 
 function insert_single_entry!(df::DataFrame, v::Any, row_ind::Real, col_ind::ColumnIndex)
     if haskey(index(df), col_ind)
-        df.columns[index(df)[col_ind]][row_ind] = v
+        columns(df)[index(df)[col_ind]][row_ind] = v
         return v
     else
         error("Cannot assign to non-existent column: $col_ind")
@@ -370,7 +370,7 @@ function insert_multiple_entries!(df::DataFrame,
                                   row_inds::AbstractVector{<:Real},
                                   col_ind::ColumnIndex)
     if haskey(index(df), col_ind)
-        df.columns[index(df)[col_ind]][row_inds] = v
+        columns(df)[index(df)[col_ind]][row_inds] = v
         return v
     else
         error("Cannot assign to non-existent column: $col_ind")
@@ -604,8 +604,8 @@ function Base.setindex!(df::DataFrame,
                         new_df::DataFrame,
                         row_inds::Colon,
                         col_inds::Colon=Colon())
-    df.columns = copy(new_df.columns)
-    df.colindex = copy(new_df.colindex)
+    setfield!(df, :columns, copy(columns(new_df)))
+    setfield!(df, :colindex, copy(index(new_df)))
     df
 end
 
@@ -630,7 +630,7 @@ Base.setindex!(df::DataFrame, x::Nothing, col_ind::Int) = delete!(df, col_ind)
 ##
 ##############################################################################
 
-Base.empty!(df::DataFrame) = (empty!(df.columns); empty!(index(df)); df)
+Base.empty!(df::DataFrame) = (empty!(columns(df)); empty!(index(df)); df)
 
 """
 Insert a column into a data frame in place.
@@ -708,7 +708,7 @@ function Base.insert!(df::DataFrame, col_ind::Int, item::AbstractVector, name::S
         end
     end
     insert!(index(df), col_ind, name)
-    insert!(df.columns, col_ind, item)
+    insert!(columns(df), col_ind, item)
     df
 end
 
@@ -784,7 +784,7 @@ end
 function Base.delete!(df::DataFrame, inds::Vector{Int})
     for ind in sort(inds, rev = true)
         if 1 <= ind <= ncol(df)
-            splice!(df.columns, ind)
+            splice!(columns(df), ind)
             delete!(index(df), ind)
         else
             throw(ArgumentError("Can't delete a non-existent DataFrame column"))
@@ -798,7 +798,7 @@ Base.delete!(df::DataFrame, c::Any) = delete!(df, index(df)[c])
 # deleterows!()
 function deleterows!(df::DataFrame, ind::Union{Integer, UnitRange{Int}})
     for i in 1:ncol(df)
-        df.columns[i] = deleteat!(df.columns[i], ind)
+        columns(df)[i] = deleteat!(columns(df)[i], ind)
     end
     df
 end
@@ -824,7 +824,7 @@ function deleterows!(df::DataFrame, ind::AbstractVector{Int})
     keep[ikeep:end] = idf:n
 
     for i in 1:ncol(df)
-        df.columns[i] = df.columns[i][keep]
+        columns(df)[i] = columns(df)[i][keep]
     end
     df
 end
@@ -1017,18 +1017,18 @@ end
 
 # array and tuple like collections
 function Base.push!(df::DataFrame, iterable::Any)
-    if length(iterable) != length(df.columns)
+    if length(iterable) != size(df, 2)
         msg = "Length of iterable does not match DataFrame column count."
         throw(ArgumentError(msg))
     end
     i = 1
     for t in iterable
         try
-            push!(df.columns[i], t)
+            push!(columns(df)[i], t)
         catch
             #clean up partial row
             for j in 1:(i - 1)
-                pop!(df.columns[j])
+                pop!(columns(df)[j])
             end
             msg = "Error adding $t to column :$(_names(df)[i]). Possible type mis-match."
             throw(ArgumentError(msg))
@@ -1094,9 +1094,9 @@ function permutecols!(df::DataFrame, p::AbstractVector)
         throw(ArgumentError("$p is not a valid column permutation for this DataFrame"))
     end
     permute!(columns(df), p)
-    df.colindex = Index(names(df)[p])
+    setfield!(df, :colindex, Index(names(df)[p]))
 end
 
 function permutecols!(df::DataFrame, p::AbstractVector{Symbol})
-    permutecols!(df, getindex.(df.colindex.lookup, p))
+    permutecols!(df, getindex.(index(df).lookup, p))
 end
diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -4,28 +4,44 @@ struct DataFrameRow{T <: AbstractDataFrame}
     row::Int
 end
 
+
+"""
+    parent(r::DataFrameRow)
+
+Return the parent data frame of `r`.
+"""
+Base.parent(r::DataFrameRow) = getfield(r, :df)
+row(r::DataFrameRow) = getfield(r, :row)
+
 function Base.getindex(r::DataFrameRow, idx::AbstractArray)
-    return DataFrameRow(r.df[idx], r.row)
+    return DataFrameRow(parent(r)[idx], row(r))
 end
 
 function Base.getindex(r::DataFrameRow, idx::Any)
-    return r.df[r.row, idx]
+    return parent(r)[row(r), idx]
 end
 
 function Base.setindex!(r::DataFrameRow, value::Any, idx::Any)
-    return setindex!(r.df, value, r.row, idx)
+    return setindex!(parent(r), value, row(r), idx)
 end
 
-Base.names(r::DataFrameRow) = names(r.df)
-_names(r::DataFrameRow) = _names(r.df)
+Base.names(r::DataFrameRow) = names(parent(r))
+_names(r::DataFrameRow) = _names(parent(r))
+
+if VERSION >= v"0.7.0-DEV.3067"
+    Base.getproperty(r::DataFrameRow, idx::Symbol) = getindex(r, idx)
+    Base.setproperty!(r::DataFrameRow, idx::Symbol, x::Any) = setindex!(r, x, idx)
+    # Private fields are never exposed since they can conflict with column names
+    Base.propertynames(r::DataFrameRow, private::Bool=false) = names(r)
+end
 
-Base.view(r::DataFrameRow, c) = DataFrameRow(r.df[[c]], r.row)
+Base.view(r::DataFrameRow, c) = DataFrameRow(parent(r)[[c]], row(r))
 
-index(r::DataFrameRow) = index(r.df)
+index(r::DataFrameRow) = index(parent(r))
 
-Base.length(r::DataFrameRow) = size(r.df, 2)
+Base.length(r::DataFrameRow) = size(parent(r), 2)
 
-Compat.lastindex(r::DataFrameRow) = size(r.df, 2)
+Compat.lastindex(r::DataFrameRow) = size(parent(r), 2)
 
 Base.collect(r::DataFrameRow) = Tuple{Symbol, Any}[x for x in r]
 
@@ -35,7 +51,7 @@ Base.next(r::DataFrameRow, s) = ((_names(r)[s], r[s]), s + 1)
 
 Base.done(r::DataFrameRow, s) = s > length(r)
 
-Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, r.df[r.row,:])
+Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, parent(r)[row(r),:])
 
 # hash column element
 Base.@propagate_inbounds hash_colel(v::AbstractArray, i, h::UInt = zero(UInt)) = hash(v[i], h)
@@ -57,7 +73,7 @@ function rowhash(cols::Tuple{Vararg{AbstractVector}}, r::Int, h::UInt = zero(UIn
 end
 
 Base.hash(r::DataFrameRow, h::UInt = zero(UInt)) =
-    rowhash(ntuple(i -> r.df[i], ncol(r.df)), r.row, h)
+    rowhash(ntuple(i -> parent(r)[i], ncol(parent(r))), row(r), h)
 
 # comparison of DataFrame rows
 # only the rows of the same DataFrame could be compared
@@ -66,7 +82,7 @@ Base.hash(r::DataFrameRow, h::UInt = zero(UInt)) =
 Base.:(==)(r1::DataFrameRow, r2::DataFrameRow) = isequal(r1, r2)
 
 function Base.isequal(r1::DataFrameRow, r2::DataFrameRow)
-    isequal_row(r1.df, r1.row, r2.df, r2.row)
+    isequal_row(parent(r1), row(r1), parent(r2), row(r2))
 end
 
 # internal method for comparing the elements of the same data table column
@@ -102,11 +118,11 @@ end
 
 # lexicographic ordering on DataFrame rows, missing > !missing
 function Base.isless(r1::DataFrameRow, r2::DataFrameRow)
-    (ncol(r1.df) == ncol(r2.df)) ||
+    (ncol(parent(r1)) == ncol(parent(r2))) ||
         throw(ArgumentError("Rows of the data tables that have different number of columns cannot be compared ($(ncol(df1)) and $(ncol(df2)))"))
-    @inbounds for i in 1:ncol(r1.df)
-        if !isequal(r1.df[i][r1.row], r2.df[i][r2.row])
-            return isless(r1.df[i][r1.row], r2.df[i][r2.row])
+    @inbounds for i in 1:ncol(parent(r1))
+        if !isequal(parent(r1)[i][row(r1)], parent(r2)[i][row(r2)])
+            return isless(parent(r1)[i][row(r1)], parent(r2)[i][row(r2)])
         end
     end
     return false

diff --git a/src/dataframerow/show.jl b/src/dataframerow/show.jl
@@ -17,7 +17,7 @@
 #' end
 function Base.show(io::IO, r::DataFrameRow)
     labelwidth = mapreduce(n -> length(string(n)), max, _names(r)) + 2
-    @printf(io, "DataFrameRow (row %d)\n", r.row)
+    @printf(io, "DataFrameRow (row %d)\n", row(r))
     for (label, value) in r
         println(io, rpad(label, labelwidth, ' '), value)
     end

diff --git a/src/dataframerow/utils.jl b/src/dataframerow/utils.jl
@@ -212,8 +212,8 @@ function findrows(gd::RowGroupDict,
 end
 
 function Base.getindex(gd::RowGroupDict, dfr::DataFrameRow)
-    g_row = findrow(gd, dfr.df, ntuple(i -> gd.df[i], ncol(gd.df)),
-                    ntuple(i -> dfr.df[i], ncol(dfr.df)), dfr.row)
+    g_row = findrow(gd, parent(dfr), ntuple(i -> gd.df[i], ncol(gd.df)),
+                    ntuple(i -> parent(dfr)[i], ncol(parent(dfr))), row(dfr))
     (g_row == 0) && throw(KeyError(dfr))
     gix = gd.groups[g_row]
     return view(gd.rperm, gd.starts[gix]:gd.stops[gix])