Skip to content
This repository has been archived by the owner on May 5, 2019. It is now read-only.

Commit

Permalink
Resolve conflicts and get tests passing
Browse files Browse the repository at this point in the history
  • Loading branch information
quinnj committed May 17, 2017
2 parents 6d95b10 + e9d6766 commit 6191aa4
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 240 deletions.
203 changes: 81 additions & 122 deletions src/datatable/datatable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,117 +74,100 @@ type DataTable <: AbstractDataTable
colindex::Index

function DataTable(columns::Vector{Any}, colindex::Index)
ncols = length(columns)
if ncols > 1
nrows = length(columns[1])
equallengths = true
for i in 2:ncols
equallengths &= length(columns[i]) == nrows
if length(columns) == length(colindex) == 0
return new(Vector{Any}(0), Index())
elseif length(columns) != length(colindex)
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of column names ($(length(colindex))) are not equal"))
end
lengths = [isa(col, AbstractArray) ? length(col) : 1 for col in columns]
minlen, maxlen = extrema(lengths)
if minlen == 0 && maxlen == 0
return new(columns, colindex)
elseif minlen != maxlen || minlen == maxlen == 1
# recycle scalars
for i in 1:length(columns)
isa(columns[i], AbstractArray) && continue
columns[i] = fill(columns[i], maxlen)
lengths[i] = maxlen
end
if !equallengths
msg = "All columns in a DataTable must be the same length"
throw(ArgumentError(msg))
uls = unique(lengths)
if length(uls) != 1
strnames = string.(names(colindex))
estrings = ["column length $u for column(s) " *
join(strnames[lengths .== u], ", ", " and ") for (i, u) in enumerate(uls)]
throw(DimensionMismatch(join(estrings, " is incompatible with ", ", and is incompatible with ")))
end
end
if length(colindex) != ncols
msg = "Columns and column index must be the same length"
throw(ArgumentError(msg))
for (i, c) in enumerate(columns)
if isa(c, Range)
columns[i] = collect(c)
elseif !isa(c, AbstractVector)
throw(DimensionMismatch("columns must be 1-dimensional"))
end
end
new(columns, colindex)
end
end

function DataTable(; kwargs...)
result = DataTable(Any[], Index())
for (k, v) in kwargs
result[k] = v
end
return result
colnames = Symbol[k for (k,v) in kwargs]
columns = Any[v for (k,v) in kwargs]
DataTable(columns, Index(colnames))
end

function DataTable(columns::AbstractVector,
cnames::AbstractVector{Symbol} = gennames(length(columns)))
return DataTable(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))
end


# Initialize empty DataTable objects of arbitrary size
function DataTable(t::Type, nrows::Integer, ncols::Integer)
columns = Vector{Any}(ncols)
for i in 1:ncols
columns[i] = NullableArray(t, nrows)
end
cnames = gennames(ncols)
return DataTable(columns, Index(cnames))
end

# Initialize an empty DataTable with specific eltypes and names
function DataTable(column_eltypes::Vector, cnames::Vector, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
columns[j] = NullableArray(column_eltypes[j], nrows)
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, nrows::Integer)
numcols = length(column_eltypes)
columns = Vector{Any}(numcols)
for j in 1:numcols
elty = column_eltypes[j]
if elty <: Nullable
if eltype(elty) <: CategoricalValue
columns[j] = NullableCategoricalArray{eltype(elty)}(nrows)
else
columns[j] = NullableVector{eltype(elty)}(nrows)
end
else
if elty <: CategoricalValue
columns[j] = CategoricalVector{elty}(nrows)
else
columns[j] = Vector{elty}(nrows)
end
end
end
return DataTable(columns, Index(cnames))
return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
end

# Initialize an empty DataTable with specific eltypes and names
# and whether a nominal array should be created
function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
nominal::Vector{Bool}, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
if nominal[j]
columns[j] = NullableCategoricalArray{column_eltypes[j]}(nrows)
else
columns[j] = NullableArray{column_eltypes[j]}(nrows)
end
end
return DataTable(columns, Index(cnames))
end

# Initialize an empty DataTable with specific eltypes
function DataTable(column_eltypes::Vector, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
cnames = gennames(p)
for j in 1:p
columns[j] = NullableArray{column_eltypes[j]}(nrows)
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
nominal::Vector{Bool}, nrows::Integer)
# upcast Vector{DataType} -> Vector{Type} which can hold CategoricalValues
updated_types = convert(Vector{Type}, column_eltypes)
for i in eachindex(nominal)
nominal[i] || continue
if updated_types[i] <: Nullable
updated_types[i] = Nullable{CategoricalValue{eltype(updated_types[i])}}
else
updated_types[i] = CategoricalValue{updated_types[i]}
end
end
return DataTable(columns, Index(cnames))
return DataTable(updated_types, cnames, nrows)
end

# Initialize from a Vector of Associatives (aka list of dicts)
function DataTable{D <: Associative}(ds::Vector{D})
ks = Set()
for d in ds
union!(ks, keys(d))
end
DataTable(ds, [ks...])
# Initialize empty DataTable objects of arbitrary size
function DataTable(t::Type, nrows::Integer, ncols::Integer)
return DataTable(fill(t, ncols), nrows)
end

# Initialize from a Vector of Associatives (aka list of dicts)
function DataTable{D <: Associative}(ds::Vector{D}, ks::Vector)
#get column eltypes
col_eltypes = Type[@compat(Union{}) for _ = 1:length(ks)]
for d in ds
for (i,k) in enumerate(ks)
if haskey(d, k) && !_isnull(d[k])
col_eltypes[i] = promote_type(col_eltypes[i], typeof(d[k]))
end
end
end
col_eltypes[col_eltypes .== @compat(Union{})] = Any

# create empty DataTable, and fill
dt = DataTable(col_eltypes, ks, length(ds))
for (i,d) in enumerate(ds)
for (j,k) in enumerate(ks)
dt[i,j] = get(d, k, Nullable())
end
end

dt
# Initialize an empty DataTable with specific eltypes
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, nrows::Integer)
return DataTable(column_eltypes, gennames(length(column_eltypes)), nrows)
end

##############################################################################
Expand Down Expand Up @@ -363,24 +346,20 @@ function insert_multiple_entries!{T <: Real}(dt::DataTable,
end
end

upgrade_vector{T<:Nullable}(v::AbstractArray{T}) = v
upgrade_vector(v::CategoricalArray) = NullableCategoricalArray(v)
upgrade_vector(v::AbstractArray) = NullableArray(v)

function upgrade_scalar(dt::DataTable, v::AbstractArray)
msg = "setindex!(::DataTable, ...) only broadcasts scalars, not arrays"
throw(ArgumentError(msg))
end
function upgrade_scalar(dt::DataTable, v::Any)
n = (ncol(dt) == 0) ? 1 : nrow(dt)
NullableArray(fill(v, n))
fill(v, n)
end

# dt[SingleColumnIndex] = AbstractVector
function Base.setindex!(dt::DataTable,
v::AbstractVector,
col_ind::ColumnIndex)
insert_single_column!(dt, upgrade_vector(v), col_ind)
insert_single_column!(dt, v, col_ind)
end

# dt[SingleColumnIndex] = Single Item (EXPANDS TO NROW(DT) if NCOL(DT) > 0)
Expand Down Expand Up @@ -417,9 +396,8 @@ end
function Base.setindex!{T <: ColumnIndex}(dt::DataTable,
v::AbstractVector,
col_inds::AbstractVector{T})
dv = upgrade_vector(v)
for col_ind in col_inds
dt[col_ind] = dv
dt[col_ind] = v
end
return dt
end
Expand Down Expand Up @@ -820,7 +798,7 @@ function Base.append!(dt1::DataTable, dt2::AbstractDataTable)
return dt1
end

function Base.convert(::Type{DataTable}, A::Matrix)
function Base.convert(::Type{DataTable}, A::AbstractMatrix)
n = size(A, 2)
cols = Vector{Any}(n)
for i in 1:n
Expand All @@ -829,35 +807,16 @@ function Base.convert(::Type{DataTable}, A::Matrix)
return DataTable(cols, Index(gennames(n)))
end

function _datatable_from_associative(dnames, d::Associative)
p = length(dnames)
p == 0 && return DataTable()
columns = Vector{Any}(p)
colnames = Vector{Symbol}(p)
n = length(d[dnames[1]])
for j in 1:p
name = dnames[j]
col = d[name]
if length(col) != n
throw(ArgumentError("All columns in Dict must have the same length"))
end
columns[j] = NullableArray(col)
colnames[j] = Symbol(name)
end
return DataTable(columns, Index(colnames))
end

function Base.convert(::Type{DataTable}, d::Associative)
dnames = collect(keys(d))
return _datatable_from_associative(dnames, d)
end

# A Dict is not sorted or otherwise ordered, and it's nicer to return a
# DataTable which is ordered in some way
function Base.convert(::Type{DataTable}, d::Dict)
dnames = collect(keys(d))
sort!(dnames)
return _datatable_from_associative(dnames, d)
colnames = keys(d)
if isa(d, Dict)
colnames = sort!(collect(keys(d)))
else
colnames = keys(d)
end
colindex = Index(Symbol[k for k in colnames])
columns = Any[d[c] for c in colnames]
DataTable(columns, colindex)
end


Expand Down
15 changes: 9 additions & 6 deletions test/cat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ module TestCat
@test vcat(null_dt, null_dt) == DataTable()
@test_throws ArgumentError vcat(null_dt, dt)
@test_throws ArgumentError vcat(dt, null_dt)
@test eltypes(vcat(dt, dt)) == [Nullable{Float64}, Nullable{Float64}, Nullable{Int}]
@test eltypes(vcat(dt, dt)) == Type[Float64, Float64, Int]
@test size(vcat(dt, dt)) == (size(dt,1)*2, size(dt,2))
@test eltypes(vcat(dt, dt, dt)) == [Nullable{Float64}, Nullable{Float64}, Nullable{Int}]
@test eltypes(vcat(dt, dt, dt)) == Type[Float64,Float64,Int]
@test size(vcat(dt, dt, dt)) == (size(dt,1)*3, size(dt,2))

alt_dt = deepcopy(dt)
Expand All @@ -110,12 +110,12 @@ module TestCat
@test names(dt4) == names(dtr)
@test isequal(dtr, [dt4; dt4])

@test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Nullable{Float64}]
@test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]
@test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == Type[Float64]
@test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == Type[Nullable{Float64}]

# Minimal container type promotion
dta = DataTable(a = CategoricalArray([1, 2, 2]))
dtb = DataTable(a = CategoricalArray([2, 3, 4]))
dta = DataTable(a = NullableCategoricalArray([1, 2, 2]))
dtb = DataTable(a = NullableCategoricalArray([2, 3, 4]))
dtc = DataTable(a = NullableArray([2, 3, 4]))
dtd = DataTable(Any[2:4], [:a])
dtab = vcat(dta, dtb)
Expand Down Expand Up @@ -249,4 +249,7 @@ module TestCat
err = @test_throws ArgumentError vcat(dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4)
@test err.value.msg == "column(s) E and F are missing from argument(s) 1, 5 and 9, column(s) B are missing from argument(s) 2, 6 and 10, and column(s) F are missing from argument(s) 3, 7 and 11"
end
x = view(DataTable(A = NullableArray(1:3)), 2)
y = DataTable(A = NullableArray(4:5))
@test isequal(vcat(x, y), DataTable(A = NullableArray([2, 4, 5])))
end
58 changes: 45 additions & 13 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,68 @@ module TestConstructors

@test isequal(dt, DataTable(Any[NullableCategoricalVector(zeros(3)),
NullableCategoricalVector(ones(3))]))
@test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
x2 = [1.0, 1.0, 1.0]))
@test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
x2 = NullableArray([1.0, 1.0, 1.0])))

dt2 = convert(DataTable, [0.0 1.0;
0.0 1.0;
0.0 1.0])
dt2 = convert(DataTable, NullableArray([0.0 1.0;
0.0 1.0;
0.0 1.0]))
names!(dt2, [:x1, :x2])
@test isequal(dt[:x1], NullableArray(dt2[:x1]))
@test isequal(dt[:x2], NullableArray(dt2[:x2]))

@test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
x2 = [1.0, 1.0, 1.0]))
@test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
x2 = [1.0, 1.0, 1.0],
x3 = [2.0, 2.0, 2.0])[[:x1, :x2]])
@test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
x2 = NullableArray([1.0, 1.0, 1.0])))
@test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
x2 = NullableArray([1.0, 1.0, 1.0]),
x3 = NullableArray([2.0, 2.0, 2.0]))[[:x1, :x2]])

dt = DataTable(Int, 2, 2)
dt = DataTable(Nullable{Int}, 2, 2)
@test size(dt) == (2, 2)
@test eltypes(dt) == [Nullable{Int}, Nullable{Int}]

dt = DataTable([Int, Float64], [:x1, :x2], 2)
dt = DataTable([Nullable{Int}, Nullable{Float64}], [:x1, :x2], 2)
@test size(dt) == (2, 2)
@test eltypes(dt) == [Nullable{Int}, Nullable{Float64}]

@test isequal(dt, DataTable([Int, Float64], 2))
@test isequal(dt, DataTable([Nullable{Int}, Nullable{Float64}], 2))

@test_throws BoundsError SubDataTable(DataTable(A=1), 0)
@test_throws BoundsError SubDataTable(DataTable(A=1), 0)
@test isequal(SubDataTable(DataTable(A=1), 1), DataTable(A=1))
@test isequal(SubDataTable(DataTable(A=1:10), 1:4), DataTable(A=1:4))
@test isequal(view(SubDataTable(DataTable(A=1:10), 1:4), 2), DataTable(A=2))
@test isequal(view(SubDataTable(DataTable(A=1:10), 1:4), [true, true, false, false]), DataTable(A=1:2))

@test DataTable(a=1, b=1:2) == DataTable(a=[1,1], b=[1,2])

@testset "associative" begin
dt = DataTable(Dict(:A => 1:3, :B => 4:6))
@test dt == DataTable(A = 1:3, B = 4:6)
@test eltypes(dt) == [Int, Int]
end

@testset "recyclers" begin
@test DataTable(a = 1:5, b = 1) == DataTable(a = collect(1:5), b = fill(1, 5))
@test DataTable(a = 1, b = 1:5) == DataTable(a = fill(1, 5), b = collect(1:5))
end

@testset "constructor errors" begin
@test_throws DimensionMismatch DataTable(a=1, b=[])
@test_throws DimensionMismatch DataTable(Any[collect(1:10)], DataTables.Index([:A, :B]))
@test_throws DimensionMismatch DataTable(A = rand(2,2))
@test_throws DimensionMismatch DataTable(A = rand(2,1))
end

@testset "column types" begin
dt = DataTable(A = 1:3, B = 2:4, C = 3:5)
answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
@test map(typeof, dt.columns) == answer
dt[:D] = NullableArray([4, 5, Nullable()])
push!(answer, NullableArray{Int,1})
@test map(typeof, dt.columns) == answer
dt[:E] = 'c'
push!(answer, Array{Char,1})
@test map(typeof, dt.columns) == answer
end
end

0 comments on commit 6191aa4

Please sign in to comment.