Skip to content

Commit

Permalink
[BREAKING] Add PrettyTables.jl backend for printing DataFrames (#2429)
Browse files Browse the repository at this point in the history
  • Loading branch information
ronisbr committed Nov 7, 2020
1 parent 55533d1 commit 681de52
Show file tree
Hide file tree
Showing 13 changed files with 827 additions and 579 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Expand Up @@ -50,6 +50,10 @@
* `unstack` now produces row and column keys in the order of their first appearance
and has two new keyword arguments `allowmissing` and `allowduplicates`
([#2494](https://github.com/JuliaData/DataFrames.jl/pull/2494))
* [PrettyTables.jl](https://github.com/ronisbr/PrettyTables.jl) is now the
default back-end to print DataFrames to text/plain; the print option
`splitcols` was removed and the output format was changed
([#2429](https://github.com/JuliaData/DataFrames.jl/pull/2429))

## New functionalities

Expand Down
32 changes: 17 additions & 15 deletions Project.toml
Expand Up @@ -13,36 +13,38 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[extras]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]

[compat]
julia = "1"
CategoricalArrays = "0.8.3"
Compat = "3.17"
DataAPI = "1.4"
InvertedIndices = "1"
IteratorInterfaceExtensions = "0.1.1, 1"
Missings = "0.4.2"
PooledArrays = "0.5"
PrettyTables = "0.10"
Reexport = "0.1, 0.2"
SortingAlgorithms = "0.1, 0.2, 0.3"
Tables = "1.1"
TableTraits = "0.4, 1"
Tables = "1.1"
julia = "1"

[extras]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]
3 changes: 3 additions & 0 deletions src/DataFrames.jl
Expand Up @@ -7,6 +7,7 @@ using Base.Sort, Base.Order, Base.Iterators
using TableTraits, IteratorInterfaceExtensions
import LinearAlgebra: norm
using Markdown
using PrettyTables

import DataAPI,
DataAPI.All,
Expand Down Expand Up @@ -53,6 +54,7 @@ export AbstractDataFrame,
nrow,
order,
outerjoin,
PrettyTables,
rename!,
rename,
repeat!,
Expand Down Expand Up @@ -111,6 +113,7 @@ include("groupeddataframe/callprocessing.jl")
include("groupeddataframe/fastaggregates.jl")
include("groupeddataframe/complextransforms.jl")

include("abstractdataframe/prettytables.jl")
include("abstractdataframe/show.jl")
include("groupeddataframe/show.jl")
include("dataframerow/show.jl")
Expand Down
11 changes: 2 additions & 9 deletions src/abstractdataframe/io.jl
Expand Up @@ -47,15 +47,8 @@ Base.show(io::IO, mime::MIME"text/csv", df::AbstractDataFrame) =
printtable(io, df, header = true, separator = ',')
Base.show(io::IO, mime::MIME"text/tab-separated-values", df::AbstractDataFrame) =
printtable(io, df, header = true, separator = '\t')
Base.show(io::IO, mime::MIME"text/plain", df::AbstractDataFrame;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true) =
show(io, df, allrows=allrows, allcols=allcols,
splitcols=splitcols, rowlabel=rowlabel, summary=summary, eltypes=eltypes)
Base.show(io::IO, mime::MIME"text/plain", df::AbstractDataFrame; kwargs...) =
show(io, df; kwargs...)

##############################################################################
#
Expand Down
54 changes: 28 additions & 26 deletions src/abstractdataframe/iteration.jl
Expand Up @@ -249,74 +249,76 @@ Base.names(itr::Union{DataFrameRows, DataFrameColumns}, cols) = names(parent(itr
function Base.show(io::IO, dfrs::DataFrameRows;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32)
truncate::Int = 32,
kwargs...)
df = parent(dfrs)
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameRows")
_show(io, df, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=false, eltypes=eltypes, truncstring=truncate)
title = summary ? "$(nrow(df))×$(ncol(df)) DataFrameRows" : ""
_show(io, df; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=false, eltypes=eltypes, truncate=truncate, title=title,
kwargs...)
end

Base.show(io::IO, mime::MIME"text/plain", dfrs::DataFrameRows;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32) =
show(io, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
truncate::Int = 32,
kwargs...) =
show(io, dfrs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)

Base.show(dfrs::DataFrameRows;
allrows::Bool = !get(stdout, :limit, true),
allcols::Bool = !get(stdout, :limit, true),
splitcols = get(stdout, :limit, true),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32) =
show(stdout, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
truncate::Int = 32,
kwargs...) =
show(stdout, dfrs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)

function Base.show(io::IO, dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32)
truncate::Int = 32,
kwargs...)
df = parent(dfcs)
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns")
_show(io, parent(dfcs), allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=false, eltypes=eltypes, truncstring=truncate)
title = summary ? "$(nrow(df))×$(ncol(df)) DataFrameColumns" : ""
_show(io, parent(dfcs); allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=false, eltypes=eltypes, truncate=truncate, title=title,
kwargs...)
end

Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32) =
show(io, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
truncate::Int = 32,
kwargs...) =
show(io, dfcs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)

Base.show(dfcs::DataFrameColumns;
allrows::Bool = !get(stdout, :limit, true),
allcols::Bool = !get(stdout, :limit, true),
splitcols = get(stdout, :limit, true),
rowlabel::Symbol = :Row,
summary::Bool = true,
eltypes::Bool = true,
truncate::Int = 32) =
show(stdout, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary, eltypes=eltypes, truncate=truncate)
truncate::Int = 32,
kwargs...) =
show(stdout, dfcs; allrows=allrows, allcols=allcols, rowlabel=rowlabel,
summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)

"""
mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
Expand Down
92 changes: 92 additions & 0 deletions src/abstractdataframe/prettytables.jl
@@ -0,0 +1,92 @@
##############################################################################
##
## Functions related to the interface with PrettyTables.jl.
##
##############################################################################

# Default DataFrames highlighter for text backend.
#
# This highlighter changes the text color to gray in cells with `nothing`,
# `missing`, `#undef`, and types related to DataFrames.jl.
function _pretty_tables_highlighter_func(data, i::Integer, j::Integer)
try
cell = data[i, j]
return ismissing(cell) ||
cell === nothing ||
cell isa Union{AbstractDataFrame, GroupedDataFrame,
DataFrameRow, DataFrameRows,
DataFrameColumns}
catch e
if isa(e, UndefRefError)
return true
else
rethrow(e)
end
end
end

const _PRETTY_TABLES_HIGHLIGHTER = Highlighter(_pretty_tables_highlighter_func,
Crayon(foreground = :dark_gray))

# Default DataFrames formatter for text backend.
#
# This formatter changes how the following types are presented when rendering
# the data frame:
# - missing;
# - nothing;
# - Cells with types related to DataFrames.jl.

function _pretty_tables_general_formatter(v, i::Integer, j::Integer)
if typeof(v) <: Union{AbstractDataFrame, GroupedDataFrame, DataFrameRow,
DataFrameRows, DataFrameColumns}

# Here, we must not use `print` or `show`. Otherwise, we will call
# `_pretty_table` to render the current table leading to a stack
# overflow.
return sprint(summary, v)
elseif ismissing(v)
return "missing"
elseif v === nothing
return ""
else
return v
end
end

# Formatter to align the floating points as in Julia array printing.
#
# - `float_cols` contains the IDs of the columns that must be formatted.
# - `indices` is a vector of vectors containing the indices of each elements
# in the data frame.
# - `padding` is a vector of vectors containing the padding of each element for
# each row.
# - `compact_printing` must be a boolean indicating if we should enable the
# `:compact` option of `io` when converting the number to string.

function _pretty_tables_float_formatter(v, i::Integer, j::Integer,
float_cols::Vector{Int},
indices::Vector{Vector{Int}},
padding::Vector{Vector{Int}},
compact_printing::Bool)
isempty(float_cols) && return v

# We apply this formatting only to the columns that contains only floats.
ind_col = findfirst(==(j), float_cols)

if ind_col !== nothing
ind_row = findfirst(==(i), indices[ind_col])

if ind_row !== nothing
pad = padding[ind_col][ind_row]

# Return the formatted number.
str = sprint(print, v, context = :compact => compact_printing)
return " "^pad * str
end
end

# The formatter is applied to all tables' cells. Hence, we must return the
# input value `v` unchanged if this cell is not part of a column that has
# floating point numbers.
return v
end

0 comments on commit 681de52

Please sign in to comment.