Skip to content
This repository has been archived by the owner on May 5, 2019. It is now read-only.

Commit

Permalink
incorporate edits suggested during review
Browse files Browse the repository at this point in the history
  • Loading branch information
cjprybol committed Mar 13, 2017
1 parent f5a53a1 commit 2c95f13
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 64 deletions.
10 changes: 7 additions & 3 deletions src/abstractdatatable/abstractdatatable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ end
Convert columns with a `Nullable` element type without any null values
to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
`NullableVectors` are aliased to their `values` field.

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

values is an internal field; shouldn't be mentioned here. Just say that the new vectors may alias with the new ones or that it is not guaranteed that a copy will be returned.

# Examples
Expand Down Expand Up @@ -805,12 +806,12 @@ julia> eltypes(dt)
Int64
```
See also [`denullify`](@ref) & [`nullify!`](@ref).
See also [`denullify`](@ref) and [`nullify!`](@ref).
"""
function denullify!(dt::AbstractDataTable)
for i in 1:size(dt,2)
if !anynull(dt[i])
dt[i] = dropnull(dt[i])
dt[i] = dropnull!(dt[i])
end
end
dt
Expand Down Expand Up @@ -889,11 +890,14 @@ See also [`nullify`](@ref) & [`denullify!`](@ref).
"""
function nullify!(dt::AbstractDataTable)
for i in 1:size(dt,2)
dt[i] = NullableArray(dt[i])
dt[i] = nullify(dt[i])
end
dt
end

nullify(x::AbstractArray) = convert(NullableArray, x)
nullify(x::AbstractCategoricalArray) = convert(NullableCategoricalArray, x)

"""
nullify(dt::AbstractDataTable)
Expand Down
2 changes: 1 addition & 1 deletion src/abstractdatatable/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function printtable(io::IO,
if !isnull(dt[j][i])
if ! (etypes[j] <: Real)
print(io, quotemark)
x = isa(dt[i, j], Nullable) ? get(dt[i, j]) : dt[i, j]
x = isa(dt[i, j], Nullable) ? _unsafe_get(dt[i, j]) : dt[i, j]
escapedprint(io, x, quotestr)
print(io, quotemark)
else
Expand Down
19 changes: 16 additions & 3 deletions src/abstractdatatable/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
## Join / merge
##

# Like similar, but returns a nullable array
similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableArray(T, dims)

similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableArray(eltype(T), dims)

similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableCategoricalArray(T, dims)

similar_nullable(dt::AbstractDataTable, dims::Int) =
DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))

# helper structure for DataTables joining
immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
dtl::DT1
Expand Down Expand Up @@ -64,9 +77,9 @@ function compose_joined_table(joiner::DataTableJoiner,
end
all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
rightcols = Any[length(col[all_orig_right_ixs]) >= resizelen ?
resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
NullableArray(vcat(col[all_orig_right_ixs], fill(Nullable(), resizelen - length(col[all_orig_right_ixs]))))[right_perm]
rightcols = Any[length(all_orig_right_ixs) >= resizelen ?
resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
copy!(similar_nullable(col[all_orig_right_ixs], resizelen), col[all_orig_right_ixs])[right_perm]

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

I don't think it's a good idea to change the return type depending on something as subtle as whether the vector needs to be resized or not. Better always allocate an array of size resizelen using similar_nullable, and copy the data into it. That will also make the code simpler.

Also, don't call col[all_orig_right_ixs] inside similar_nullable: pass col instead to avoid allocating a copy just to discard it.

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

This can definitely be improved. How about this? Comments for reasoning are included here, but not included in the code I'm about to push (can be added there if you'd like)

# length(all_orig_right_ixs) > resizelen is impossible so change this to ==
rightcols = Any[length(all_orig_right_ixs) == resizelen ?
                # no need to resize at all if the col is the right size
                col[all_orig_right_ixs][right_perm] :
                # if the column is smaller than resizelen, then we know we will be introducing
                # missing data so create a similar nullable and copy the data into it
                # but don't create a nullable array if it's not necessary
                copy!(similar_nullable(col, resizelen), col[all_orig_right_ixs])[right_perm]
                for col in columns(dtr_noon)]

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

That code still introduces a type instability depending on the length. That's why I think you need to allocate the array first and then fill it.

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

I would definitely not like to introduce performance regressions, but I'm not following why the type instability would be an issue here. The result of this is going into a Vector{Any}, which will type-destabilize anything after this point anyway, right? Another improvement would be that we don't need to perform the length(all_orig_right_ixs) == resizelen check on each loop of for col in columns(dtr_noon) since they all should be the same length. We could move the type determination out of this array comprehension. Would that address the type instability?

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

I also wouldn't like to automatically promote to NullableArrays during joins, that defeats the point of this PR

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

I'm not so much concerned with performance than with usability: you can't work with a Nullable the same way as with a scalar, so that shouldn't change without explicit request. I think what we should do is that joins that can produce null values should always return nullable columns: if you request such a join, you expect missing values to be possible. Other kinds of joins (like inner joins) should only return nullable columns when one of the inputs is nullable.

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

Ok, I think I understand what you're saying. Let's say you do several joins on several different combinations of tables. Some of those joins produce missing entries and others don't. You'd have to write different code to handle the null-free subset than the null-containing subset of those results. I agree, that would be annoying and a usability issue. But if we use similar_nullable here in all cases then there are scenarios where complete data gets converted to NullableArrays. Here's a case from the tests

julia> dt = DataTable(Name = Nullable{String}["A", "B", "C"],
                      Mass = [1.5, 2.2, 1.1])
3×2 DataTables.DataTable
│ Row │ Name │ Mass │
├─────┼──────┼──────┤
│ 1   │ A    │ 1.5  │
│ 2   │ B    │ 2.2  │
│ 3   │ C    │ 1.1  │

julia> dt2 = DataTable(Name = Nullable{String}["A", "B", "C", "A"],
                       Quantity = [3, 3, 2, 4])
4×2 DataTables.DataTable
│ Row │ Name │ Quantity │
├─────┼──────┼──────────┤
│ 1   │ A    │ 3        │
│ 2   │ B    │ 3        │
│ 3   │ C    │ 2        │
│ 4   │ A    │ 4        │

julia> dt3 = join(dt2, dt, on=:Name, kind=:left)
4×3 DataTables.DataTable
│ Row │ Name │ Quantity │ Mass │
├─────┼──────┼──────────┼──────┤
│ 1   │ A    │ 31.5  │
│ 2   │ B    │ 32.2  │
│ 3   │ C    │ 21.1  │
│ 4   │ A    │ 41.5  │

julia> eltypes(dt)
2-element Array{Type,1}:
 Nullable{String}
 Float64

julia> eltypes(dt2)
2-element Array{Type,1}:
 Nullable{String}
 Int64

julia> eltypes(dt3)
3-element Array{Type,1}:
 Nullable{String}
 Int64
 Nullable{Float64}

This function is used by :inner, :left, and :outer joins and so :inner and :left will both convert Arrays to NullableArrays. Here's another example with :inner

julia> dt1 = DataTable(name = [:bob, :sue, :xavier], age = [25, 84, 41])
3×2 DataTables.DataTable
│ Row │ name   │ age │
├─────┼────────┼─────┤
│ 1   │ bob    │ 25  │
│ 2   │ sue    │ 84  │
│ 3   │ xavier │ 41  │

julia> dt2 = DataTable(name = [:bob, :xavier, :apple], age = [25, 41, 2])
3×2 DataTables.DataTable
│ Row │ name   │ age │
├─────┼────────┼─────┤
│ 1   │ bob    │ 25  │
│ 2   │ xavier │ 41  │
│ 3   │ apple  │ 2   │

julia> dt3 = join(dt1, dt2, kind=:inner, on=:name)
2×3 DataTables.DataTable
│ Row │ name   │ age │ age_1 │
├─────┼────────┼─────┼───────┤
│ 1   │ bob    │ 2525    │
│ 2   │ xavier │ 4141    │

julia> eltypes(dt3)
3-element Array{Type,1}:
 Symbol
 Int64
 Nullable{Int64}

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

We could say if kind == :outer then always use similar_nullable?

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Yes, that's what I meant. Forget the length check: either always return a nullable array, or never.

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Though we also need to do the same for :left and :right, right?

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

Yea, that's what I saw when I looked into how those joins work. Right now it defaults to similar_nullable for all but :inner (which means :left, :right, and :outer in this case)

https://github.com/JuliaData/DataTables.jl/pull/30/files#diff-b1b6de17647445a5c08f65fd851680caR77

for col in columns(dtr_noon)]
right_dt = DataTable(rightcols, names(dtr_noon))
# merge left and right parts of the joined table
Expand Down
7 changes: 6 additions & 1 deletion src/abstractdatatable/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,19 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
end
payload = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(dt)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
if i > 0 && j > 0
if nowarning && !isnull(payload[j][i])
warn("Duplicate entries in unstack.")
nowarning = false
end
payload[j][i] = valuecol[k]
end
end
denullify!(insert!(payload, 1, levels(refkeycol), _names(dt)[rowkey]))
denullify!(insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey]))
end
unstack(dt::AbstractDataTable, rowkey, colkey, value) =
unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
Expand Down
84 changes: 55 additions & 29 deletions src/datatable/datatable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,25 +77,42 @@ type DataTable <: AbstractDataTable
if length(columns) == length(colindex) == 0
return new(Vector{Any}(0), Index())
elseif length(columns) != length(colindex)
throw(DimensionMismatch("Number of columns and column names are different"))
throw(DimensionMismatch("Number of columns ($(length(columns))) and column names ($(length(colindex))) are not equal"))

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

"and of column names" would be even better (the original message was slightly ambiguous).

end
# do we allow people assigning arrays to columns now?
# make sure that doesn't work
# can use !get(size(c, 2), 0)
lengths = length.(columns)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

length is not going to work to detect scalars: length("ab") == 2 and length(:a) throws an error. So you need to check whether isa(col, AbstractVector) before calling length, and return 1 if not.

minlen, maxlen = extrema(lengths)
if minlen == 0 && maxlen == 0
return new(columns, colindex)
elseif (minlen == 0 && maxlen > 0) || any(x -> x != 0, mod(maxlen, lengths))
throw(DimensionMismatch("Incompatible lengths of arguments"))
else
for i in 1:length(columns)
if isa(columns[i], Range)
columns[i] = collect(columns[i])
elseif minlen != maxlen
# recycle scalars
if minlen == 1 && maxlen > 1
indices = find(lengths .== minlen)
for i in indices

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Even simpler:

for i in 1:length(columns)
    typeof(columns[i]) <: AbstractArray && continue
    ...
end
if !(typeof(columns[i]) <: AbstractArray)
columns[i] = fill(columns[i], maxlen)
lengths[i] = maxlen
end
end
repeats = div(maxlen, length(columns[i]))
if repeats == 1 && !(typeof(columns[i]) <: AbstractVector)
columns[i] = [columns[i]]
elseif repeats !== 1
columns[i] = isa(columns[i], Array) ? repeat(columns[i], outer=repeats) : fill(columns[i], repeats)
end
uniques = unique(lengths)
if length(uniques) != 1
estring = Vector{String}(length(uniques))
strnames = string.(names(colindex))
for (i,u) in enumerate(uniques)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Why not just use a comprehension?

indices = find(lengths .== u)
estring[i] = "column length ($(lengths[1])) for column(s) ($(join(strnames[indices], ", ")))"
end
throw(DimensionMismatch(join(estring, " is incompatible with ")))
end
end
for (i,c) in enumerate(columns)
if isa(c, Range)
columns[i] = collect(c)
elseif !isa(c, AbstractVector)
columns[i] = size(c, 2) > 1 ? reshape(c, length(c)) : [c]

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Double space. No need to check size since we should only have vectors or arrays at this point: so just call reshape.

end
end
return new(columns, colindex)
Expand All @@ -106,14 +123,18 @@ function DataTable(; kwargs...)
if length(kwargs) == 0
return DataTable(Any[], Index())
end
columns = Any[v for (k,v) in kwargs]
colindex = DataTables.Index([k for (k,v) in kwargs])
DataTable(columns, colindex)
colnames = Vector{Symbol}(length(kwargs))
columns = Vector{Any}(length(kwargs))
for (i,(k,v)) in enumerate(kwargs)
colnames[i] = Symbol(k)
columns[i] = v
end
DataTable(columns, Index(colnames))
end

function DataTable(columns::AbstractVector,
cnames::AbstractVector{Symbol} = gennames(length(columns)))
return DataTable(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Why change this?

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

gennames always returns a Vector{Symbol}

res = Array{Symbol}(n)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

But couldn't a user want to call this constructor directly?

This comment has been minimized.

Copy link
@cjprybol

cjprybol Mar 15, 2017

Author Contributor

Yes, I hadn't thought of that as being problematic. All of the other constructors have cnames as either cnames::Vector or cnames::Vector{Symbol}. This PR currently changes all instances of cnames::Vector to being cnames::Vector{Symbol}. This constructor stood out as being particularly forgiving. We could change all of them to cnames::AbstractVector{Symbol} with a call to convert(Vector{Symbol}, cnames)?

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

I'd just leave this alone for now. One PR = one change set.

cnames::Vector{Symbol} = gennames(length(columns)))
return DataTable(convert(Vector{Any}, columns), Index(cnames))
end


Expand All @@ -128,37 +149,40 @@ function DataTable(t::Type, nrows::Integer, ncols::Integer)
end

# Initialize an empty DataTable with specific eltypes and names
function DataTable(column_eltypes::Vector, cnames::Vector, nrows::Integer)
function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol}, nrows::Integer)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Use AbstractVector{T} with T<:Type instead.

p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
columns[j] = Vector{column_eltypes[j]}(nrows)
T = column_eltypes[j]
columns[j] = T <: Nullable ? NullableArray{eltype(T)}(nrows) : Vector{T}(nrows)
end
return DataTable(columns, Index(cnames))
end
# Initialize an empty DataTable with specific eltypes and names
# and whether a nominal array should be created
function DataTable(column_eltypes::Vector, cnames::Vector,
function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
nominal::Vector{Bool}, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
if nominal[j]
columns[j] = CategoricalVector{column_eltypes[j]}(nrows)
else
columns[j] = Vector{column_eltypes[j]}(nrows)
end
T = column_eltypes[j]
if nominal[j]
columns[j] = T <: Nullable ? NullableCategoricalArray{T}(nrows) : CategoricalVector{T}(nrows)
else
columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
end
end
return DataTable(columns, Index(cnames))
end

# Initialize an empty DataTable with specific eltypes
function DataTable(column_eltypes::Vector, nrows::Integer)
function DataTable(column_eltypes::Vector{DataType}, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
cnames = gennames(p)
for j in 1:p
columns[j] = Vector{column_eltypes[j]}(nrows)
T = column_eltypes[j]
columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
end
return DataTable(columns, Index(cnames))
end
Expand Down Expand Up @@ -806,8 +830,10 @@ function Base.convert(::Type{DataTable}, A::Matrix)
end

function Base.convert(::Type{DataTable}, d::Associative)
colnames = collect(keys(d))
isa(d, Dict) && sort!(colnames)
colnames = keys(d)
if isa(d, Dict)
colnames = sort!(collect(colnames))
end
colindex = Index([Symbol(k) for k in colnames])
columns = Any[d[c] for c in colnames]
DataTable(columns, colindex)
Expand Down
27 changes: 11 additions & 16 deletions test/cat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ module TestCat
dt[1:2, 1:2] = [3,2]
dt[[true,false,false,true], 2:3] = [2,3]

vcat([])
vcat(null_dt)
vcat(null_dt, null_dt)
vcat(null_dt, dt)
vcat(dt, null_dt)
vcat(dt, dt)
vcat(dt, dt, dt)
@test vcat(DataTable()) == DataTable()
@test vcat(null_dt) == DataTable()
@test vcat(null_dt, null_dt) == DataTable()
@test vcat(null_dt, dt) == dt
@test vcat(dt, null_dt) == dt
@test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt)), (Float64, Float64, Int)))

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Just all(eltypes(vcat(dt, dt)) .== (Float64, Float64, Int)).

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Or even eltypes(vcat(dt, dt)) == [Float64, Float64, Int].

@test size(vcat(dt, dt)) == (size(dt,1)*2, size(dt,2))
@test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt, dt)), (Float64, Float64, Int)))
@test size(vcat(dt, dt, dt)) == (size(dt,1)*3, size(dt,2))

alt_dt = deepcopy(dt)
vcat(dt, alt_dt)
Expand All @@ -94,21 +94,16 @@ module TestCat
@test isequal(dtr, [dt4; dt4])

# Eltype promotion
# Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
if VERSION >= v"0.5.0-dev"
@test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Float64]
@test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]
else
@test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Any]
@test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Any}]
end
@test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Float64]
@test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]

# Minimal container type promotion
dta = DataTable(a = CategoricalArray([1, 2, 2]))
dtb = DataTable(a = CategoricalArray([2, 3, 4]))
dtc = DataTable(a = NullableArray([2, 3, 4]))
dtd = DataTable(Any[2:4], [:a])
dtab = vcat(dta, dtb)
dtac = vcat(nullify(dta), dtc)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Should check the result.

@test isequal(dtab[:a], [1, 2, 2, 2, 3, 4])
@test isa(dtab[:a], CategoricalVector{Int})
dc = vcat(dtd, dtc)
Expand Down
20 changes: 10 additions & 10 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ module TestConstructors

@test isequal(dt, DataTable(Any[NullableCategoricalVector(zeros(3)),
NullableCategoricalVector(ones(3))]))
@test !isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
x2 = [1.0, 1.0, 1.0]))

dt2 = convert(DataTable, [0.0 1.0;
0.0 1.0;
Expand All @@ -28,19 +26,21 @@ module TestConstructors
@test isequal(dt[:x1], NullableArray(dt2[:x1]))
@test isequal(dt[:x2], NullableArray(dt2[:x2]))

@test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
x2 = NullableCategoricalVector([1.0, 1.0, 1.0])))
@test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
x2 = NullableCategoricalVector([1.0, 1.0, 1.0]),
@test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
x2 = NullableArray([1.0, 1.0, 1.0])))
@test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
x2 = NullableArray([1.0, 1.0, 1.0]),
x3 = [2.0, 2.0, 2.0])[[:x1, :x2]])

dt = DataTable(Int, 2, 2)
@test size(dt) == (2, 2)
@test eltypes(dt) == [Int, Int]

dt = DataTable([Int, Float64], [:x1, :x2], 2)
dt = DataTable([Nullable{Int}, Nullable{Float64}], [:x1, :x2], 2)
@test size(dt) == (2, 2)
@test eltypes(dt) == [Int, Float64]
@test eltypes(dt) == [Nullable{Int}, Nullable{Float64}]

@test isequal(dt, DataTable([Nullable{Int}, Nullable{Float64}], 2))

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Could also check that the columns contain only nulls.


@test_throws BoundsError SubDataTable(DataTable(A=1), 0)
@test_throws BoundsError SubDataTable(DataTable(A=1), 0)
Expand All @@ -51,12 +51,12 @@ module TestConstructors
@test DataTable(a=1, b=1:2) == DataTable(a=[1,1], b=[1,2])

@testset "associative" begin
dt = DataTable(Dict(k => v for (k,v) in zip([:A, :B], [1:3, 4:6])))
dt = DataTable(Dict(:A => 1:3, :B => 4:6))
@test dt == DataTable(A = 1:3, B = 4:6)
@test all(e -> e <: Int, eltypes(dt))
end

@testset "recyclers" begin
@test DataTable([collect(1:10), collect(1:20)], [:x, :y]) == DataTable(x = vcat(1:10, 1:10), y = 1:20)
@test DataTable(a = 1:5, b = 1) == DataTable(a = collect(1:5), b = fill(1, 5))
@test DataTable(a = 1, b = 1:5) == DataTable(a = fill(1, 5), b = collect(1:5))
end
Expand Down
2 changes: 1 addition & 1 deletion test/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ module TestConversions
@test isequal(dt[:b], b)
@test isequal(dt[:c], c)

a = [1.0]
a = 1.0
di = Dict("a"=>a, "b"=>b, "c"=>c)
@test convert(DataTable,di)[:a] == [1.0, 1.0]

Expand Down

0 comments on commit 2c95f13

Please sign in to comment.