Skip to content
This repository has been archived by the owner on May 5, 2019. It is now read-only.

Commit

Permalink
lots of edits
Browse files Browse the repository at this point in the history
clarify error messages, fix spacing, address case of length(::Symbol)
and length(::String) not giving the desired output (Vector length), and
unify constructors in accepting AbstractVector{Symbol} for colnames
  • Loading branch information
cjprybol committed Mar 15, 2017
1 parent 7462612 commit 9b65533
Showing 1 changed file with 30 additions and 40 deletions.
70 changes: 30 additions & 40 deletions src/datatable/datatable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,39 +77,33 @@ type DataTable <: AbstractDataTable
if length(columns) == length(colindex) == 0
return new(Vector{Any}(0), Index())
elseif length(columns) != length(colindex)
throw(DimensionMismatch("Number of columns ($(length(columns))) and column names ($(length(colindex))) are not equal"))
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of column names ($(length(colindex))) are not equal"))
end
lengths = length.(columns)
lengths = [isa(col, AbstractArray) ? length(col) : 1 for col in columns]
minlen, maxlen = extrema(lengths)
if minlen == 0 && maxlen == 0
return new(columns, colindex)
elseif minlen != maxlen
# recycle scalars
if minlen == 1 && maxlen > 1
indices = find(lengths .== minlen)
for i in indices
if !(typeof(columns[i]) <: AbstractVector)
columns[i] = fill(columns[i], maxlen)
lengths[i] = maxlen
end
end
for i in 1:length(columns)
typeof(columns[i]) <: AbstractArray && continue
columns[i] = fill(columns[i], maxlen)
lengths[i] = maxlen
end
uniques = unique(lengths)
if length(uniques) != 1
estring = Vector{String}(length(uniques))
uls = unique(lengths)
if length(uls) != 1
# estring = Vector{String}(length(uniques))

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Remove this?

strnames = string.(names(colindex))
for (i,u) in enumerate(uniques)
indices = find(lengths .== u)
estring[i] = "column length ($(uniques[i])) for column(s) ($(join(strnames[indices], ", ")))"
end
estring = ["column length ($(uls[i])) for column(s) ($(join(strnames[find(uls .== u)], ", ")))"
for (i,u) in enumerate(uls)]
throw(DimensionMismatch(join(estring, " is incompatible with ")))
end
end
for (i,c) in enumerate(columns)
if isa(c, Range)
columns[i] = collect(c)
elseif !isa(c, AbstractVector)
columns[i] = size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]
columns[i] = size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

As I said, the size check shouldn't be there since we don't want to accept 1-column matrices either. Just always throw the error.

end
end
return new(columns, colindex)
Expand All @@ -120,21 +114,16 @@ function DataTable(; kwargs...)
if length(kwargs) == 0
return DataTable(Any[], Index())
end
colnames = Vector{Symbol}(length(kwargs))
columns = Vector{Any}(length(kwargs))
for (i,(k,v)) in enumerate(kwargs)
colnames[i] = Symbol(k)
columns[i] = v
end
colnames = [Symbol(k) for (k,v) in kwargs]
columns = Any[v for (k,v) in kwargs]
DataTable(columns, Index(colnames))
end

function DataTable(columns::AbstractVector,
cnames::Vector{Symbol} = gennames(length(columns)))
return DataTable(convert(Vector{Any}, columns), Index(cnames))
cnames::AbstractVector{Symbol} = gennames(length(columns)))
return DataTable(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))
end


# Initialize empty DataTable objects of arbitrary size
function DataTable(t::Type, nrows::Integer, ncols::Integer)
columns = Vector{Any}(ncols)
Expand All @@ -146,40 +135,41 @@ function DataTable(t::Type, nrows::Integer, ncols::Integer)
end

# Initialize an empty DataTable with specific eltypes and names
function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol}, nrows::Integer)
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
T = column_eltypes[j]
columns[j] = T <: Nullable ? NullableArray{eltype(T)}(nrows) : Vector{T}(nrows)
colT = column_eltypes[j]

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

colT is misleading since this is the column element type.

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

I think a common abbreviation is elty.

columns[j] = colT <: Nullable ? NullableArray{eltype(colT)}(nrows) : Vector{colT}(nrows)
end
return DataTable(columns, Index(cnames))
return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
end

# Initialize an empty DataTable with specific eltypes and names
# and whether a nominal array should be created
function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
nominal::Vector{Bool}, nrows::Integer)
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
nominal::Vector{Bool}, nrows::Integer)

This comment has been minimized.

Copy link
@nalimilan

nalimilan Mar 15, 2017

Member

Incorrect alignment.

p = length(column_eltypes)
columns = Vector{Any}(p)
for j in 1:p
T = column_eltypes[j]
colT = column_eltypes[j]
if nominal[j]
columns[j] = T <: Nullable ? NullableCategoricalArray{T}(nrows) : CategoricalVector{T}(nrows)
columns[j] = colT <: Nullable ? NullableCategoricalArray{colT}(nrows) : CategoricalVector{colT}(nrows)
else
columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
end
end
return DataTable(columns, Index(cnames))
return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
end

# Initialize an empty DataTable with specific eltypes
function DataTable(column_eltypes::Vector{DataType}, nrows::Integer)
function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, nrows::Integer)
p = length(column_eltypes)
columns = Vector{Any}(p)
cnames = gennames(p)
for j in 1:p
T = column_eltypes[j]
columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
colT = column_eltypes[j]
columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
end
return DataTable(columns, Index(cnames))
end
Expand Down

0 comments on commit 9b65533

Please sign in to comment.