Skip to content

Commit

Permalink
Merge ba396e6 into 3a6a7b3
Browse files Browse the repository at this point in the history
  • Loading branch information
quinnj committed Sep 11, 2017
2 parents 3a6a7b3 + ba396e6 commit d8e61c7
Show file tree
Hide file tree
Showing 16 changed files with 138 additions and 134 deletions.
65 changes: 9 additions & 56 deletions src/abstractdataframe/abstractdataframe.jl
Expand Up @@ -68,8 +68,8 @@ abstract type AbstractDataFrame end
##############################################################################

# index(df) => AbstractIndex
# nrow(df) => Int
# ncol(df) => Int
# size(df, 1) => Int
# size(df, 2) => Int
# getindex(...)
# setindex!(...) exclusive of methods that add new columns

Expand Down Expand Up @@ -203,19 +203,7 @@ eltypes(df)
"""
eltypes(df::AbstractDataFrame) = map!(eltype, Vector{Type}(size(df,2)), columns(df))

Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
function Base.size(df::AbstractDataFrame, i::Integer)
if i == 1
nrow(df)
elseif i == 2
ncol(df)
else
throw(ArgumentError("DataFrames only have two dimensions"))
end
end

Base.length(df::AbstractDataFrame) = ncol(df)
Base.endof(df::AbstractDataFrame) = ncol(df)
Base.endof(df::AbstractDataFrame) = size(df, 2)

Base.ndims(::AbstractDataFrame) = 2

Expand Down Expand Up @@ -254,17 +242,17 @@ end

Base.haskey(df::AbstractDataFrame, key::Any) = haskey(index(df), key)
Base.get(df::AbstractDataFrame, key::Any, default::Any) = haskey(df, key) ? df[key] : default
Base.isempty(df::AbstractDataFrame) = ncol(df) == 0
Base.isempty(df::AbstractDataFrame) = size(df, 2) == 0

##############################################################################
##
## Description
##
##############################################################################

head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
head(df::AbstractDataFrame, r::Int) = df[1:min(r,size(df, 1)), :]
head(df::AbstractDataFrame) = head(df, 6)
tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
tail(df::AbstractDataFrame, r::Int) = df[max(1,size(df, 1)-r+1):size(df, 1), :]
tail(df::AbstractDataFrame) = tail(df, 6)

"""
Expand Down Expand Up @@ -323,7 +311,7 @@ dump(df)
"""
function Base.dump(io::IO, df::AbstractDataFrame, n::Int, indent)
println(io, typeof(df), " $(nrow(df)) observations of $(ncol(df)) variables")
println(io, typeof(df), " $(size(df, 1)) observations of $(size(df, 2)) variables")
if n > 0
for (name, col) in eachcol(df)
print(io, indent, " ", name, ": ")
Expand Down Expand Up @@ -568,7 +556,7 @@ function nonunique(df::AbstractDataFrame)
gslots = row_group_slots(df)[3]
# unique rows are the first encountered group representatives,
# nonunique are everything else
res = fill(true, nrow(df))
res = fill(true, size(df, 1))
@inbounds for g_row in gslots
(g_row > 0) && (res[g_row] = false)
end
Expand Down Expand Up @@ -637,7 +625,7 @@ function colmissing(df::AbstractDataFrame) # -> Vector{Int}
end

function without(df::AbstractDataFrame, icols::Vector{Int})
newcols = setdiff(1:ncol(df), icols)
newcols = setdiff(1:size(df, 2), icols)
df[newcols]
end
without(df::AbstractDataFrame, i::Int) = without(df, [i])
Expand Down Expand Up @@ -767,38 +755,3 @@ function Base.hash(df::AbstractDataFrame)
end
return UInt(h)
end


## Documentation for methods defined elsewhere

"""
Number of rows or columns in an AbstractDataFrame
```julia
nrow(df::AbstractDataFrame)
ncol(df::AbstractDataFrame)
```
**Arguments**
* `df` : the AbstractDataFrame
**Result**
* `::AbstractDataFrame` : the updated version
See also [`size`](@ref).
NOTE: these functions may be depreciated for `size`.
**Examples**
```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
size(df)
nrow(df)
ncol(df)
```
"""
# nrow, ncol
6 changes: 3 additions & 3 deletions src/abstractdataframe/io.jl
Expand Up @@ -208,13 +208,13 @@ DataFrameStream(df::DataFrame) = DataFrameStream(Tuple(df.columns), string.(name
# DataFrame Data.Source implementation
function Data.schema(df::DataFrame)
return Data.Schema(Type[eltype(A) for A in df.columns],
string.(names(df)), length(df) == 0 ? 0 : length(df.columns[1]))
string.(names(df)), size(df, 1))
end

Data.isdone(source::DataFrame, row, col, rows, cols) = row > rows || col > cols
function Data.isdone(source::DataFrame, row, col)
cols = length(source)
return Data.isdone(source, row, col, cols == 0 ? 0 : length(df.columns[1]), cols)
rows, cols = size(source)
return Data.isdone(source, row, col, rows, cols)
end

Data.streamtype(::Type{DataFrame}, ::Type{Data.Column}) = true
Expand Down
32 changes: 16 additions & 16 deletions src/abstractdataframe/join.jl
Expand Up @@ -70,8 +70,8 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,

nrow = length(all_orig_left_ixs) + roil
@assert nrow == length(all_orig_right_ixs) + loil
ncleft = ncol(joiner.dfl)
cols = Vector{Any}(ncleft + ncol(dfr_noon))
ncleft = size(joiner.dfl, 2)
cols = Vector{Any}(ncleft + size(dfr_noon, 2))
_similar = kind == :inner ? similar : similar_nullable
for (i, col) in enumerate(columns(joiner.dfl))
cols[i] = _similar(col, nrow)
Expand Down Expand Up @@ -132,10 +132,10 @@ function update_row_maps!(left_table::AbstractDataFrame,
@inline update!(mask::Vector{Bool}, orig_ixs::AbstractArray) = (mask[orig_ixs] = false)

# iterate over left rows and compose the left<->right index map
right_dict_cols = ntuple(i -> right_dict.df[i], ncol(right_dict.df))
left_table_cols = ntuple(i -> left_table[i], ncol(left_table))
right_dict_cols = ntuple(i -> right_dict.df[i], size(right_dict.df, 2))
left_table_cols = ntuple(i -> left_table[i], size(left_table, 2))
next_join_ix = 1
for l_ix in 1:nrow(left_table)
for l_ix in 1:size(left_table, 1)
r_ixs = findrows(right_dict, left_table, right_dict_cols, left_table_cols, l_ix)
if isempty(r_ixs)
update!(leftonly_ixs, l_ix, next_join_ix)
Expand Down Expand Up @@ -164,16 +164,16 @@ function update_row_maps!(left_table::AbstractDataFrame,
map_left::Bool, map_leftonly::Bool,
map_right::Bool, map_rightonly::Bool)
init_map(df::AbstractDataFrame, init::Bool) = init ?
RowIndexMap(sizehint!(Vector{Int}(), nrow(df)),
sizehint!(Vector{Int}(), nrow(df))) : nothing
RowIndexMap(sizehint!(Vector{Int}(), size(df, 1)),
sizehint!(Vector{Int}(), size(df, 1))) : nothing
to_bimap(x::RowIndexMap) = x
to_bimap(::Void) = RowIndexMap(Vector{Int}(), Vector{Int}())

# init maps as requested
left_ixs = init_map(left_table, map_left)
leftonly_ixs = init_map(left_table, map_leftonly)
right_ixs = init_map(right_table, map_right)
rightonly_mask = map_rightonly ? fill(true, nrow(right_table)) : nothing
rightonly_mask = map_rightonly ? fill(true, size(right_table, 1)) : nothing
update_row_maps!(left_table, right_table, right_dict, left_ixs, leftonly_ixs, right_ixs, rightonly_mask)
if map_rightonly
rightonly_orig_ixs = find(rightonly_mask)
Expand Down Expand Up @@ -276,10 +276,10 @@ function Base.join(df1::AbstractDataFrame,
dfr_on_grp = group_rows(joiner.dfr_on)
# iterate over left rows and leave those found in right
left_ixs = Vector{Int}()
sizehint!(left_ixs, nrow(joiner.dfl))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], ncol(dfr_on_grp.df))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], ncol(joiner.dfl_on))
@inbounds for l_ix in 1:nrow(joiner.dfl_on)
sizehint!(left_ixs, size(joiner.dfl, 1))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], size(dfr_on_grp.df, 2))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], size(joiner.dfl_on, 2))
@inbounds for l_ix in 1:size(joiner.dfl_on, 1)
if findrow(dfr_on_grp, joiner.dfl_on, dfr_on_grp_cols, dfl_on_cols, l_ix) != 0
push!(left_ixs, l_ix)
end
Expand All @@ -290,10 +290,10 @@ function Base.join(df1::AbstractDataFrame,
dfr_on_grp = group_rows(joiner.dfr_on)
# iterate over left rows and leave those not found in right
leftonly_ixs = Vector{Int}()
sizehint!(leftonly_ixs, nrow(joiner.dfl))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], ncol(dfr_on_grp.df))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], ncol(joiner.dfl_on))
@inbounds for l_ix in 1:nrow(joiner.dfl_on)
sizehint!(leftonly_ixs, size(joiner.dfl, 1))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], size(dfr_on_grp.df, 2))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], size(joiner.dfl_on, 2))
@inbounds for l_ix in 1:size(joiner.dfl_on, 1)
if findrow(dfr_on_grp, joiner.dfl_on, dfr_on_grp_cols, dfl_on_cols, l_ix) == 0
push!(leftonly_ixs, l_ix)
end
Expand Down
16 changes: 8 additions & 8 deletions src/abstractdataframe/reshape.jl
Expand Up @@ -81,7 +81,7 @@ function stack(df::AbstractDataFrame, measure_vars::Vector{Int},
cnames = names(df)[id_vars]
insert!(cnames, 1, value_name)
insert!(cnames, 1, variable_name)
DataFrame(Any[repeat(_names(df)[measure_vars], inner=nrow(df)), # variable
DataFrame(Any[repeat(_names(df)[measure_vars], inner=size(df, 1)), # variable
vcat([df[c] for c in measure_vars]...), # value
[repeat(df[c], outer=N) for c in id_vars]...], # id_var columns
cnames)
Expand Down Expand Up @@ -114,7 +114,7 @@ numeric_vars(df::AbstractDataFrame) =
function stack(df::AbstractDataFrame, measure_vars = numeric_vars(df);
variable_name::Symbol=:variable, value_name::Symbol=:value)
mv_inds = index(df)[measure_vars]
stack(df, mv_inds, setdiff(1:ncol(df), mv_inds);
stack(df, mv_inds, setdiff(1:size(df, 2), mv_inds);
variable_name=variable_name, value_name=value_name)
end

Expand All @@ -129,7 +129,7 @@ end
function melt(df::AbstractDataFrame, id_vars;
variable_name::Symbol=:variable, value_name::Symbol=:value)
id_inds = index(df)[id_vars]
stack(df, setdiff(1:ncol(df), id_inds), id_inds;
stack(df, setdiff(1:size(df, 2), id_inds), id_inds;
variable_name=variable_name, value_name=value_name)
end
function melt(df::AbstractDataFrame, id_vars, measure_vars;
Expand Down Expand Up @@ -200,7 +200,7 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
Ncol = length(keycol.pool)
payload = DataFrame(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(df)
for k in 1:size(df, 1)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
if i > 0 && j > 0
Expand Down Expand Up @@ -237,7 +237,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
Ncol = length(levels(keycol))
df2 = DataFrame(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(df)
for k in 1:size(df, 1)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
i = rowkey[k]
if i > 0 && j > 0
Expand Down Expand Up @@ -451,7 +451,7 @@ function stackdf(df::AbstractDataFrame, measure_vars::Vector{Int},
cnames = names(df)[id_vars]
insert!(cnames, 1, value_name)
insert!(cnames, 1, variable_name)
DataFrame(Any[RepeatedVector(_names(df)[measure_vars], nrow(df), 1), # variable
DataFrame(Any[RepeatedVector(_names(df)[measure_vars], size(df, 1), 1), # variable
StackedVector(Any[df[:,c] for c in measure_vars]), # value
[RepeatedVector(df[:,c], 1, N) for c in id_vars]...], # id_var columns
cnames)
Expand Down Expand Up @@ -479,7 +479,7 @@ end
function stackdf(df::AbstractDataFrame, measure_vars = numeric_vars(df);
variable_name::Symbol=:variable, value_name::Symbol=:value)
m_inds = index(df)[measure_vars]
stackdf(df, m_inds, setdiff(1:ncol(df), m_inds);
stackdf(df, m_inds, setdiff(1:size(df, 2), m_inds);
variable_name=variable_name, value_name=value_name)
end

Expand All @@ -489,7 +489,7 @@ A stacked view of a DataFrame (long format); see `stackdf`
function meltdf(df::AbstractDataFrame, id_vars; variable_name::Symbol=:variable,
value_name::Symbol=:value)
id_inds = index(df)[id_vars]
stackdf(df, setdiff(1:ncol(df), id_inds), id_inds;
stackdf(df, setdiff(1:size(df, 2), id_inds), id_inds;
variable_name=variable_name, value_name=value_name)
end
function meltdf(df::AbstractDataFrame, id_vars, measure_vars;
Expand Down
4 changes: 2 additions & 2 deletions src/abstractdataframe/sort.jl
Expand Up @@ -61,7 +61,7 @@ immutable DFPerm{O<:Union{Ordering, AbstractVector}, DF<:AbstractDataFrame} <: O
end

function DFPerm{O<:Ordering, DF<:AbstractDataFrame}(ords::AbstractVector{O}, df::DF)
if length(ords) != ncol(df)
if length(ords) != size(df, 2)
error("DFPerm: number of column orderings does not equal the number of DataFrame columns")
end
DFPerm{typeof(ords), DF}(ords, df)
Expand All @@ -77,7 +77,7 @@ Base.@propagate_inbounds Base.getindex(o::DFPerm, i::Int, j::Int) = o.df[i, j]
Base.@propagate_inbounds Base.getindex(o::DFPerm, a::DataFrameRow, j::Int) = a[j]

function Sort.lt(o::DFPerm, a, b)
@inbounds for i = 1:ncol(o.df)
@inbounds for i = 1:size(o.df, 2)
ord = col_ordering(o, i)
va = o[a, i]
vb = o[b, i]
Expand Down

0 comments on commit d8e61c7

Please sign in to comment.