[BREAKING] Add PrettyTables.jl backend for printing DataFrames (#2429)

JuliaData · Nov 7, 2020 · 681de52 · 681de52
1 parent 55533d1
commit 681de52
Show file tree

Hide file tree

Showing 13 changed files with 827 additions and 579 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -50,6 +50,10 @@
 * `unstack` now produces row and column keys in the order of their first appearance
    and has two new keyword arguments `allowmissing` and `allowduplicates`
   ([#2494](https://github.com/JuliaData/DataFrames.jl/pull/2494))
+* [PrettyTables.jl](https://github.com/ronisbr/PrettyTables.jl) is now the
+  default back-end to print DataFrames to text/plain; the print option
+  `splitcols` was removed and the output format was changed
+  ([#2429](https://github.com/JuliaData/DataFrames.jl/pull/2429))
 
 ## New functionalities
 

diff --git a/Project.toml b/Project.toml
@@ -13,36 +13,38 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
-[extras]
-DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]
-
 [compat]
-julia = "1"
 CategoricalArrays = "0.8.3"
 Compat = "3.17"
 DataAPI = "1.4"
 InvertedIndices = "1"
 IteratorInterfaceExtensions = "0.1.1, 1"
 Missings = "0.4.2"
 PooledArrays = "0.5"
+PrettyTables = "0.10"
 Reexport = "0.1, 0.2"
 SortingAlgorithms = "0.1, 0.2, 0.3"
-Tables = "1.1"
 TableTraits = "0.4, 1"
+Tables = "1.1"
+julia = "1"
+
+[extras]
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -7,6 +7,7 @@ using Base.Sort, Base.Order, Base.Iterators
 using TableTraits, IteratorInterfaceExtensions
 import LinearAlgebra: norm
 using Markdown
+using PrettyTables
 
 import DataAPI,
        DataAPI.All,
@@ -53,6 +54,7 @@ export AbstractDataFrame,
        nrow,
        order,
        outerjoin,
+       PrettyTables,
        rename!,
        rename,
        repeat!,
@@ -111,6 +113,7 @@ include("groupeddataframe/callprocessing.jl")
 include("groupeddataframe/fastaggregates.jl")
 include("groupeddataframe/complextransforms.jl")
 
+include("abstractdataframe/prettytables.jl")
 include("abstractdataframe/show.jl")
 include("groupeddataframe/show.jl")
 include("dataframerow/show.jl")

diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl
@@ -47,15 +47,8 @@ Base.show(io::IO, mime::MIME"text/csv", df::AbstractDataFrame) =
     printtable(io, df, header = true, separator = ',')
 Base.show(io::IO, mime::MIME"text/tab-separated-values", df::AbstractDataFrame) =
     printtable(io, df, header = true, separator = '\t')
-Base.show(io::IO, mime::MIME"text/plain", df::AbstractDataFrame;
-          allrows::Bool = !get(io, :limit, false),
-          allcols::Bool = !get(io, :limit, false),
-          splitcols = get(io, :limit, false),
-          rowlabel::Symbol = :Row,
-          summary::Bool = true,
-          eltypes::Bool = true) =
-    show(io, df, allrows=allrows, allcols=allcols,
-         splitcols=splitcols, rowlabel=rowlabel, summary=summary, eltypes=eltypes)
+Base.show(io::IO, mime::MIME"text/plain", df::AbstractDataFrame; kwargs...) =
+    show(io, df; kwargs...)
 
 ##############################################################################
 #

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -249,74 +249,76 @@ Base.names(itr::Union{DataFrameRows, DataFrameColumns}, cols) = names(parent(itr
 function Base.show(io::IO, dfrs::DataFrameRows;
                    allrows::Bool = !get(io, :limit, false),
                    allcols::Bool = !get(io, :limit, false),
-                   splitcols = get(io, :limit, false),
                    rowlabel::Symbol = :Row,
                    summary::Bool = true,
                    eltypes::Bool = true,
-                   truncate::Int = 32)
+                   truncate::Int = 32,
+                   kwargs...)
     df = parent(dfrs)
-    summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameRows")
-    _show(io, df, allrows=allrows, allcols=allcols, splitcols=splitcols,
-          rowlabel=rowlabel, summary=false, eltypes=eltypes, truncstring=truncate)
+    title = summary ? "$(nrow(df))×$(ncol(df)) DataFrameRows" : ""
+    _show(io, df; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+          summary=false, eltypes=eltypes, truncate=truncate, title=title,
+          kwargs...)
 end
 
 Base.show(io::IO, mime::MIME"text/plain", dfrs::DataFrameRows;
           allrows::Bool = !get(io, :limit, false),
           allcols::Bool = !get(io, :limit, false),
-          splitcols = get(io, :limit, false),
           rowlabel::Symbol = :Row,
           summary::Bool = true,
           eltypes::Bool = true,
-          truncate::Int = 32) =
-    show(io, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
-         rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
+          truncate::Int = 32,
+          kwargs...) =
+    show(io, dfrs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+         summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
 
 Base.show(dfrs::DataFrameRows;
           allrows::Bool = !get(stdout, :limit, true),
           allcols::Bool = !get(stdout, :limit, true),
-          splitcols = get(stdout, :limit, true),
           rowlabel::Symbol = :Row,
           summary::Bool = true,
           eltypes::Bool = true,
-          truncate::Int = 32) =
-    show(stdout, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
-         rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
+          truncate::Int = 32,
+          kwargs...) =
+    show(stdout, dfrs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+         summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
 
 function Base.show(io::IO, dfcs::DataFrameColumns;
                    allrows::Bool = !get(io, :limit, false),
                    allcols::Bool = !get(io, :limit, false),
-                   splitcols = get(io, :limit, false),
                    rowlabel::Symbol = :Row,
                    summary::Bool = true,
                    eltypes::Bool = true,
-                   truncate::Int = 32)
+                   truncate::Int = 32,
+                   kwargs...)
     df = parent(dfcs)
-    summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns")
-    _show(io, parent(dfcs), allrows=allrows, allcols=allcols, splitcols=splitcols,
-          rowlabel=rowlabel, summary=false, eltypes=eltypes, truncstring=truncate)
+    title = summary ? "$(nrow(df))×$(ncol(df)) DataFrameColumns" : ""
+    _show(io, parent(dfcs); allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+          summary=false, eltypes=eltypes, truncate=truncate, title=title,
+          kwargs...)
 end
 
 Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns;
           allrows::Bool = !get(io, :limit, false),
           allcols::Bool = !get(io, :limit, false),
-          splitcols = get(io, :limit, false),
           rowlabel::Symbol = :Row,
           summary::Bool = true,
           eltypes::Bool = true,
-          truncate::Int = 32) =
-    show(io, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
-         rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
+          truncate::Int = 32,
+          kwargs...) =
+    show(io, dfcs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+         summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
 
 Base.show(dfcs::DataFrameColumns;
           allrows::Bool = !get(stdout, :limit, true),
           allcols::Bool = !get(stdout, :limit, true),
-          splitcols = get(stdout, :limit, true),
           rowlabel::Symbol = :Row,
           summary::Bool = true,
           eltypes::Bool = true,
-          truncate::Int = 32) =
-    show(stdout, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
-         rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
+          truncate::Int = 32,
+          kwargs...) =
+    show(stdout, dfcs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
+         summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
 
 """
     mapcols(f::Union{Function,Type}, df::AbstractDataFrame)

diff --git a/src/abstractdataframe/prettytables.jl b/src/abstractdataframe/prettytables.jl
@@ -0,0 +1,92 @@
+##############################################################################
+##
+## Functions related to the interface with PrettyTables.jl.
+##
+##############################################################################
+
+# Default DataFrames highlighter for text backend.
+#
+# This highlighter changes the text color to gray in cells with `nothing`,
+# `missing`, `#undef`, and types related to DataFrames.jl.
+function _pretty_tables_highlighter_func(data, i::Integer, j::Integer)
+    try
+        cell = data[i, j]
+        return ismissing(cell) ||
+            cell === nothing ||
+            cell isa Union{AbstractDataFrame, GroupedDataFrame,
+                           DataFrameRow, DataFrameRows,
+                           DataFrameColumns}
+    catch e
+        if isa(e, UndefRefError)
+            return true
+        else
+            rethrow(e)
+        end
+    end
+end
+
+const _PRETTY_TABLES_HIGHLIGHTER = Highlighter(_pretty_tables_highlighter_func,
+                                               Crayon(foreground = :dark_gray))
+
+# Default DataFrames formatter for text backend.
+#
+# This formatter changes how the following types are presented when rendering
+# the data frame:
+#     - missing;
+#     - nothing;
+#     - Cells with types related to DataFrames.jl.
+
+function _pretty_tables_general_formatter(v, i::Integer, j::Integer)
+    if typeof(v) <: Union{AbstractDataFrame, GroupedDataFrame, DataFrameRow,
+                          DataFrameRows, DataFrameColumns}
+
+        # Here, we must not use `print` or `show`. Otherwise, we will call
+        # `_pretty_table` to render the current table leading to a stack
+        # overflow.
+        return sprint(summary, v)
+    elseif ismissing(v)
+        return "missing"
+    elseif v === nothing
+        return ""
+    else
+        return v
+    end
+end
+
+# Formatter to align the floating points as in Julia array printing.
+#
+# - `float_cols` contains the IDs of the columns that must be formatted.
+# - `indices` is a vector of vectors containing the indices of each elements
+#   in the data frame.
+# - `padding` is a vector of vectors containing the padding of each element for
+#   each row.
+# - `compact_printing` must be a boolean indicating if we should enable the
+#   `:compact` option of `io` when converting the number to string.
+
+function _pretty_tables_float_formatter(v, i::Integer, j::Integer,
+                                        float_cols::Vector{Int},
+                                        indices::Vector{Vector{Int}},
+                                        padding::Vector{Vector{Int}},
+                                        compact_printing::Bool)
+    isempty(float_cols) && return v
+
+    # We apply this formatting only to the columns that contains only floats.
+    ind_col = findfirst(==(j), float_cols)
+
+    if ind_col !== nothing
+        ind_row = findfirst(==(i), indices[ind_col])
+
+        if ind_row !== nothing
+            pad = padding[ind_col][ind_row]
+
+            # Return the formatted number.
+            str = sprint(print, v, context = :compact => compact_printing)
+            return " "^pad * str
+        end
+    end
+
+    # The formatter is applied to all tables' cells. Hence, we must return the
+    # input value `v` unchanged if this cell is not part of a column that has
+    # floating point numbers.
+    return v
+end