JuliaData · cjprybol · Mar 10, 2017 · Mar 13, 2017 · Mar 13, 2017 · Mar 13, 2017
diff --git a/docs/src/lib/manipulation.md b/docs/src/lib/manipulation.md
@@ -20,6 +20,4 @@ join
 melt
 stack
 unstack
-stackdt
-meltdt
 ```
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
@@ -53,29 +53,6 @@ If the remaining columns are unique, you can skip the id variable and use:
 widedt = unstack(longdt, :variable, :value)
 ```
 
-`stackdt` and `meltdt` are two additional functions that work like `stack` and `melt`, but they provide a view into the original wide DataTable. Here is an example:
-
-```julia
-d = stackdt(iris)
-```
-
-This saves memory. To create the view, several AbstractVectors are defined:
-
-`:variable` column -- `EachRepeatedVector`  
-This repeats the variables N times where N is the number of rows of the original AbstractDataTable.
-
-`:value` column -- `StackedVector`  
-This is provides a view of the original columns stacked together.
-
-Id columns -- `RepeatedVector`  
-This repeats the original columns N times where N is the number of columns stacked.
-
-For more details on the storage representation, see:
-
-```julia
-dump(stackdt(iris))
-```
-
 None of these reshaping functions perform any aggregation. To do aggregation, use the split-apply-combine functions in combination with reshaping. Here is an example:
 
 ```julia

diff --git a/src/DataTables.jl b/src/DataTables.jl
@@ -47,6 +47,8 @@ export @~,
        combine,
        completecases,
        deleterows!,
+       denullify!,
+       denullify,
        describe,
        dropnull,
        dropnull!,
@@ -55,19 +57,19 @@ export @~,
        eltypes,
        groupby,
        melt,
-       meltdt,
        names!,
        ncol,
        nonunique,
        nrow,
        nullable!,
+       nullify!,
+       nullify,
        order,
        printtable,
        rename!,
        rename,
        showcols,
        stack,
-       stackdt,
        unique!,
        unstack,
        head,

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
@@ -31,6 +31,10 @@ The following are normally implemented for AbstractDataTables:
 * [`nonunique`](@ref) : indexes of duplicate rows
 * [`unique!`](@ref) : remove duplicate rows
 * `similar` : a DataTable with similar columns as `d`
+* `denullify` : unwrap `Nullable` columns
+* `denullify!` : unwrap `Nullable` columns in-place
+* `nullify` : convert all columns to NullableArrays
+* `nullify!` : convert all columns to NullableArrays in-place
 
 **Indexing**
 
@@ -706,83 +710,50 @@ Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable) = hcat!(dt[:, :], dt2)
 Base.hcat(dt::AbstractDataTable, x, y...) = hcat!(hcat(dt, x), y...)
 Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...)
 
-# vcat only accepts DataTables. Finds union of columns, maintaining order
-# of first dt. Missing data become null values.
-
-Base.vcat(dt::AbstractDataTable) = dt
+"""
+    vcat(dts::AbstractDataTable...)
 
-Base.vcat(dts::AbstractDataTable...) = vcat(AbstractDataTable[dts...])
+Vertically concatenate `AbstractDataTables` with matching columns.
 
-function Base.vcat{T<:AbstractDataTable}(dts::Vector{T})
+```julia
+julia> dt1 = DataTable(A=1:3, B=1:3); dt2 = DataTable(A=4:6, B=4:6); dt3 = DataTable(A=7:9, B=7:9, C=7:9);
+
+julia> vcat(dt1, dt2)
+6×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+│ 4   │ 4 │ 4 │
+│ 5   │ 5 │ 5 │
+│ 6   │ 6 │ 6 │
+
+julia> vcat(dt1, dt2, dt3)
+ERROR: ArgumentError: columns (A, B) of input(s) (1, 2) != columns (A, B, C) of input(s) (3)
+```
+"""
+Base.vcat(dt::AbstractDataTable) = dt
+function Base.vcat(dts::AbstractDataTable...)
     isempty(dts) && return DataTable()
-    coltyps, colnams, similars = _colinfo(dts)
-
-    res = DataTable()
-    Nrow = sum(nrow, dts)
-    for j in 1:length(colnams)
-        colnam = colnams[j]
-        col = similar(similars[j], coltyps[j], Nrow)
-
-        i = 1
-        for dt in dts
-            if haskey(dt, colnam)
-                copy!(col, i, dt[colnam])
-            end
-            i += size(dt, 1)
-        end
-
-        res[colnam] = col
-    end
-    res
-end
-
-_isnullable{T}(::AbstractArray{T}) = T <: Nullable
-const EMPTY_DATA = NullableArray(Void, 0)
-
-function _colinfo{T<:AbstractDataTable}(dts::Vector{T})
-    dt1 = dts[1]
-    colindex = copy(index(dt1))
-    coltyps = eltypes(dt1)
-    similars = collect(columns(dt1))
-    nonnull_ct = Int[_isnullable(c) for c in columns(dt1)]
-
-    for i in 2:length(dts)
-        dt = dts[i]
-        for j in 1:size(dt, 2)
-            col = dt[j]
-            cn, ct = _names(dt)[j], eltype(col)
-            if haskey(colindex, cn)
-                idx = colindex[cn]
-
-                oldtyp = coltyps[idx]
-                if !(ct <: oldtyp)
-                    coltyps[idx] = promote_type(oldtyp, ct)
-                    # Needed on Julia 0.4 since e.g.
-                    # promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T},
-                    # which is not a usable type: fall back to Nullable{Any}
-                    if VERSION < v"0.5.0-dev" &&
-                       coltyps[idx] <: Nullable && !isa(coltyps[idx].types[2], DataType)
-                        coltyps[idx] = Nullable{Any}
-                    end
-                end
-                nonnull_ct[idx] += !_isnullable(col)
-            else # new column
-                push!(colindex, cn)
-                push!(coltyps, ct)
-                push!(similars, col)
-                push!(nonnull_ct, !_isnullable(col))
-            end
-        end
-    end
-
-    for j in 1:length(colindex)
-        if nonnull_ct[j] < length(dts) && !_isnullable(similars[j])
-            similars[j] = EMPTY_DATA
+    allheaders = map(names, dts)
+    # don't vcat empty DataTables
+    notempty = find(x -> length(x) > 0, allheaders)
+    uniqueheaders = unique(allheaders[notempty])
+    if length(uniqueheaders) == 0
+        return DataTable()
+    elseif length(uniqueheaders) > 1
+        estring = Vector{String}(length(uniqueheaders))
+        for (i,u) in enumerate(uniqueheaders)
+            indices = string.(find(x -> x == u, allheaders))
+            estring[i] = "columns ($(join(u, ", "))) of input(s) ($(join(indices, ", ")))"
         end
+        throw(ArgumentError(join(estring, " != ")))
+    else
+        header = uniqueheaders[1]
+        dts_to_vcat = dts[notempty]
+        return DataTable(Any[vcat(map(dt -> dt[col], dts_to_vcat)...) for col in header], header)
     end
-    colnams = _names(colindex)
-
-    coltyps, colnams, similars
 end
 
 ##############################################################################
@@ -801,6 +772,168 @@ function Base.hash(dt::AbstractDataTable)
     return @compat UInt(h)
 end
 
+"""
+    denullify!(dt::AbstractDataTable)
+
+Convert columns with a `Nullable` element type without any null values
+to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = NullableArray(1:3), B = [Nullable(i) for i=1:3])
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(denullify!(dt))
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+```
+
+See also [`denullify`](@ref) and [`nullify!`](@ref).
+"""
+function denullify!(dt::AbstractDataTable)
+    for i in 1:size(dt,2)
+        if !anynull(dt[i])
+            dt[i] = dropnull!(dt[i])
+        end
+    end
+    dt
+end
+
+"""
+    denullify(dt::AbstractDataTable)
+
+Return a copy of `dt` where columns with a `Nullable` element type without any
+null values have been converted to a non-`Nullable` equivalent array type.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = NullableArray(1:3), B = [Nullable(i) for i=1:3])
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(denullify(dt))
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+```
+
+See also [`denullify!`] & [`nullify`](@ref).
+"""
+denullify(dt::AbstractDataTable) = denullify!(deepcopy(dt))
+
+"""
+    nullify!(dt::AbstractDataTable)
+
+Convert all columns of `dt` to nullable arrays. The table `dt` is modified in place.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = 1:3, B = 1:3)
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(nullify!(dt))
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+```
+
+See also [`nullify`](@ref) & [`denullify!`](@ref).
+"""
+function nullify!(dt::AbstractDataTable)
+    for i in 1:size(dt,2)
+        dt[i] = nullify(dt[i])
+    end
+    dt
+end
+
+nullify(x::AbstractArray) = convert(NullableArray, x)
+nullify(x::AbstractCategoricalArray) = convert(NullableCategoricalArray, x)
+
+"""
+    nullify(dt::AbstractDataTable)
+
+Return a copy of `dt` with all columns converted to nullable arrays.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = 1:3, B = 1:3)
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(nullify(dt))
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+```
+
+See also [`nullify!`](@ref) & [`denullify`](@ref).
+"""
+function nullify(dt::AbstractDataTable)
+    nullify!(deepcopy(dt))
+end
 
 ## Documentation for methods defined elsewhere