Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved show for DataFrames #995

Merged
merged 6 commits into from Sep 19, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 12 additions & 14 deletions src/abstractdataframe/show.jl
Expand Up @@ -99,6 +99,10 @@ function getmaxwidths(df::AbstractDataFrame,
rowindices1::AbstractVector{Int},
rowindices2::AbstractVector{Int},
rowlabel::Symbol) # -> Vector{Int}
# TODO: correct calculation of width for the cases:
# 1) DataFrame(a=["∀ε⫺0: x+ε⫺x"])
# 2) DataFrame(a=[[1:30;]])
# 3) decide how '\r', '\n', '\t' characters should be handled in strings
maxwidths = Vector{Int}(size(df, 2) + 1)

undefstrwidth = ourstrwidth(Base.undef_ref_str)
Expand Down Expand Up @@ -298,7 +302,7 @@ end
#' @param splitchunks::Bool Should the printing of the AbstractDataFrame
#' be done in chunks? Defaults to `false`.
#' @param allcols::Bool Should only one chunk be printed if printing in
#' chunks? Defaults to `false`.
#' chunks? Defaults to `true`.
#' @param rowlabel::Symbol What label should be printed when rendering the
#' numeric ID's of each row? Defaults to `"Row"`.
#' @param displaysummary::Bool Should a brief string summary of the
Expand Down Expand Up @@ -331,10 +335,7 @@ function showrows(io::IO,

rowmaxwidth = maxwidths[ncols + 1]
chunkbounds = getchunkbounds(maxwidths, splitchunks, displaysize(io)[2])
nchunks = length(chunkbounds) - 1
if !allcols
nchunks = min(nchunks, 1)
end
nchunks = allcols ? length(chunkbounds) - 1 : min(length(chunkbounds) - 1, 1)

header = displaysummary ? summary(df) : ""
if !allcols && length(chunkbounds) > 2
Expand Down Expand Up @@ -581,13 +582,11 @@ function showcols(io::IO, df::AbstractDataFrame, all::Bool = false,
Missing = colmissing(df))
nrows, ncols = size(df)
if values && nrows > 0
# type of Values column is now String; it might need to be changed
# if the way strings are printed in data frames changes
if nrows == 1
metadata[:Values] = [sprint(showcompact, df[1, i]) for i in 1:ncols]
metadata[:Values] = [sprint(ourshowcompact, df[1, i]) for i in 1:ncols]
else
metadata[:Values] = [sprint(showcompact, df[1, i]) * " … " *
sprint(showcompact, df[end, i]) for i in 1:ncols]
metadata[:Values] = [sprint(ourshowcompact, df[1, i]) * " … " *
sprint(ourshowcompact, df[end, i]) for i in 1:ncols]
end
end
(all?showall:show)(io, metadata, true, Symbol("Col #"), false)
Expand All @@ -601,7 +600,7 @@ end
#' count.
#'
#' @param df::AbstractDataFrame An AbstractDataFrame.
#' @param allcols::Bool If `false` (default), only a subset of columns
#' @param all::Bool If `false` (default), only a subset of columns
#' fitting on the screen is printed.
#' @param values::Bool If `true` (default), first and last value of
#' each column is printed.
Expand All @@ -612,7 +611,6 @@ end
#'
#' df = DataFrame(A = 1:3, B = ["x", "y", "z"])
#' showcols(df)
function showcols(df::AbstractDataFrame, allcols::Bool=false, values::Bool=true)
showcols(STDOUT, df, allcols, values) # -> Void
function showcols(df::AbstractDataFrame, all::Bool=false, values::Bool=true)
showcols(STDOUT, df, all, values) # -> Void
end

179 changes: 171 additions & 8 deletions test/show.jl
Expand Up @@ -4,8 +4,6 @@ module TestShow
# In the future newline characte \n should be added to this test case
df = DataFrame(A = 1:4, B = ["x\"", "∀ε⫺0: x+ε⫺x", "z\$", "ABC"],
C = Float32[1.0, 2.0, 3.0, 4.0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a null value somewhere so that this is covered (unless it's done elsewhere already)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is already covered I believe in line:

df = DataFrame(Fish = ["Suzy", "Amir"], Mass = [1.5, null])

at the end of the file

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but showcols isn't tested there. Would be worth adding a test.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added showcols test

srand(1)
df_big = DataFrame(rand(50,50))

refstr = """
4×3 DataFrames.DataFrame
Expand Down Expand Up @@ -41,27 +39,181 @@ module TestShow
refstr = """
4×3 DataFrames.DataFrame

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a detail, but we probably don't need an empty line? That would be more consistent with the other format.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

│ Col # │ Name │ Eltype │ Missing │ Values
├───────┼──────┼─────────┼─────────┼─────────────────
│ 1 │ A │ Int64 │ 0 │ 1 … 4
│ 2 │ B │ String │ 0 │ \"x\\\"\"\"ABC\"
│ 3 │ C │ Float32 │ 0 │ 1.0 … 4.0 │"""
│ Col # │ Name │ Eltype │ Missing │ Values │
├───────┼──────┼─────────┼─────────┼─────────────┤
│ 1 │ A │ Int64 │ 0 │ 1 … 4 │
│ 2 │ B │ String │ 0 │ x\" … ABC
│ 3 │ C │ Float32 │ 0 │ 1.0 … 4.0 │"""
for a in [true, false]
io = IOBuffer()
showcols(io, df, a, true)
str = String(take!(io))
@test str == refstr
end

io = IOBuffer()
srand(1)
df_big = DataFrame(rand(25,5))

io = IOContext(IOBuffer(), :displaysize=>(10,40))
show(io, df_big)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also test the output of these functions (even if that's verbose, it's fine)? Else the case when there are too many rows won't be covered. You should probably pass a custom IOContext to control the size of the display. Also better define df_big here rather than above, where it isn't used.

str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame. Omitted printing of 2 columns
│ Row │ x1 │ x2 │ x3 │
├─────┼──────────┼──────────┼──────────┤
│ 1 │ 0.236033 │ 0.644883 │ 0.440897 │
│ 24 │ 0.278582 │ 0.241591 │ 0.990741 │
│ 25 │ 0.751313 │ 0.884837 │ 0.550334 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
show(io, df_big, true)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame
│ Row │ x1 │ x2 │ x3 │
├─────┼──────────┼──────────┼──────────┤
│ 1 │ 0.236033 │ 0.644883 │ 0.440897 │
│ 24 │ 0.278582 │ 0.241591 │ 0.990741 │
│ 25 │ 0.751313 │ 0.884837 │ 0.550334 │

│ Row │ x4 │ x5 │
├─────┼──────────┼──────────┤
│ 1 │ 0.580782 │ 0.138763 │
│ 24 │ 0.762276 │ 0.755415 │
│ 25 │ 0.339081 │ 0.649056 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe set the number of rows to a lower value in order to have smaller test and check what happens when not all rows can be shown in a single page? Can also be done in a later PR if you prefer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is what I check here. I assume 10 rows and 40 columns. And you can see the difference between show and showall.
show limits the output to fit page height and showall does not do that.
They also differ in how they handle wide data (not fitting the screen vertically) and we set allcols to true: show does paging and showall prints full table ignoring :displaysize (which could useful, when e.g. we want to dump DataFrame show result to a file).

showall(io, df_big)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame
│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │
├─────┼────────────┼───────────┼───────────┼───────────┼───────────┤
│ 1 │ 0.236033 │ 0.644883 │ 0.440897 │ 0.580782 │ 0.138763 │
│ 2 │ 0.346517 │ 0.0778264 │ 0.404673 │ 0.768359 │ 0.456446 │
│ 3 │ 0.312707 │ 0.848185 │ 0.736787 │ 0.519525 │ 0.739918 │
│ 4 │ 0.00790928 │ 0.0856352 │ 0.953803 │ 0.514863 │ 0.816004 │
│ 5 │ 0.488613 │ 0.553206 │ 0.0951856 │ 0.998136 │ 0.114529 │
│ 6 │ 0.210968 │ 0.46335 │ 0.519675 │ 0.603682 │ 0.748928 │
│ 7 │ 0.951916 │ 0.185821 │ 0.0135403 │ 0.758775 │ 0.878108 │
│ 8 │ 0.999905 │ 0.111981 │ 0.303399 │ 0.590953 │ 0.930481 │
│ 9 │ 0.251662 │ 0.976312 │ 0.702557 │ 0.722086 │ 0.896291 │
│ 10 │ 0.986666 │ 0.0516146 │ 0.596537 │ 0.953207 │ 0.663145 │
│ 11 │ 0.555751 │ 0.53803 │ 0.638935 │ 0.384411 │ 0.472799 │
│ 12 │ 0.437108 │ 0.455692 │ 0.872347 │ 0.320011 │ 0.880525 │
│ 13 │ 0.424718 │ 0.279395 │ 0.548635 │ 0.865625 │ 0.0141033 │
│ 14 │ 0.773223 │ 0.178246 │ 0.262992 │ 0.45457 │ 0.502774 │
│ 15 │ 0.28119 │ 0.548983 │ 0.526443 │ 0.420287 │ 0.224851 │
│ 16 │ 0.209472 │ 0.370971 │ 0.465019 │ 0.225151 │ 0.287858 │
│ 17 │ 0.251379 │ 0.894166 │ 0.275519 │ 0.286169 │ 0.104033 │
│ 18 │ 0.0203749 │ 0.648054 │ 0.461823 │ 0.309144 │ 0.475749 │
│ 19 │ 0.287702 │ 0.417039 │ 0.951861 │ 0.170391 │ 0.416681 │
│ 20 │ 0.859512 │ 0.144566 │ 0.288737 │ 0.147162 │ 0.521387 │
│ 21 │ 0.0769509 │ 0.622403 │ 0.661232 │ 0.230063 │ 0.908499 │
│ 22 │ 0.640396 │ 0.872334 │ 0.194568 │ 0.0929292 │ 0.102832 │
│ 23 │ 0.873544 │ 0.524975 │ 0.393193 │ 0.681415 │ 0.670421 │
│ 24 │ 0.278582 │ 0.241591 │ 0.990741 │ 0.762276 │ 0.755415 │
│ 25 │ 0.751313 │ 0.884837 │ 0.550334 │ 0.339081 │ 0.649056 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
showall(io, df_big, false)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame. Omitted printing of 3 columns
│ Row │ x1 │ x2 │
├─────┼────────────┼───────────┤
│ 1 │ 0.236033 │ 0.644883 │
│ 2 │ 0.346517 │ 0.0778264 │
│ 3 │ 0.312707 │ 0.848185 │
│ 4 │ 0.00790928 │ 0.0856352 │
│ 5 │ 0.488613 │ 0.553206 │
│ 6 │ 0.210968 │ 0.46335 │
│ 7 │ 0.951916 │ 0.185821 │
│ 8 │ 0.999905 │ 0.111981 │
│ 9 │ 0.251662 │ 0.976312 │
│ 10 │ 0.986666 │ 0.0516146 │
│ 11 │ 0.555751 │ 0.53803 │
│ 12 │ 0.437108 │ 0.455692 │
│ 13 │ 0.424718 │ 0.279395 │
│ 14 │ 0.773223 │ 0.178246 │
│ 15 │ 0.28119 │ 0.548983 │
│ 16 │ 0.209472 │ 0.370971 │
│ 17 │ 0.251379 │ 0.894166 │
│ 18 │ 0.0203749 │ 0.648054 │
│ 19 │ 0.287702 │ 0.417039 │
│ 20 │ 0.859512 │ 0.144566 │
│ 21 │ 0.0769509 │ 0.622403 │
│ 22 │ 0.640396 │ 0.872334 │
│ 23 │ 0.873544 │ 0.524975 │
│ 24 │ 0.278582 │ 0.241591 │
│ 25 │ 0.751313 │ 0.884837 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
showcols(io, df_big, false, false)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame

│ Col # │ Name │ Eltype │ Missing │
├───────┼──────┼─────────┼─────────┤
│ 1 │ x1 │ Float64 │ 0 │
│ 2 │ x2 │ Float64 │ 0 │
│ 3 │ x3 │ Float64 │ 0 │
│ 4 │ x4 │ Float64 │ 0 │
│ 5 │ x5 │ Float64 │ 0 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
showcols(io, df_big, true, false)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame

│ Col # │ Name │ Eltype │ Missing │
├───────┼──────┼─────────┼─────────┤
│ 1 │ x1 │ Float64 │ 0 │
│ 2 │ x2 │ Float64 │ 0 │
│ 3 │ x3 │ Float64 │ 0 │
│ 4 │ x4 │ Float64 │ 0 │
│ 5 │ x5 │ Float64 │ 0 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
showcols(io, df_big, false, true)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame

│ Col # │ Name │ Eltype │ Missing │
├───────┼──────┼─────────┼─────────┤
│ 1 │ x1 │ Float64 │ 0 │
│ 2 │ x2 │ Float64 │ 0 │
│ 3 │ x3 │ Float64 │ 0 │
│ 4 │ x4 │ Float64 │ 0 │
│ 5 │ x5 │ Float64 │ 0 │

│ Col # │ Values │
├───────┼───────────────────────┤
│ 1 │ 0.236033 … 0.751313 │
│ 2 │ 0.644883 … 0.884837 │
│ 3 │ 0.440897 … 0.550334 │
│ 4 │ 0.580782 … 0.339081 │
│ 5 │ 0.138763 … 0.649056 │"""

io = IOContext(IOBuffer(), :displaysize=>(10,40))
showcols(io, df_big, true, true)
str = String(take!(io.io))
@test str == """
25×5 DataFrames.DataFrame

│ Col # │ Name │ Eltype │ Missing │ Values │
├───────┼──────┼─────────┼─────────┼───────────────────────┤
│ 1 │ x1 │ Float64 │ 0 │ 0.236033 … 0.751313 │
│ 2 │ x2 │ Float64 │ 0 │ 0.644883 … 0.884837 │
│ 3 │ x3 │ Float64 │ 0 │ 0.440897 … 0.550334 │
│ 4 │ x4 │ Float64 │ 0 │ 0.580782 … 0.339081 │
│ 5 │ x5 │ Float64 │ 0 │ 0.138763 … 0.649056 │"""

io = IOBuffer()
df_small = DataFrame([1.0:5.0;])
Expand Down Expand Up @@ -131,6 +283,17 @@ module TestShow
│ 1 │ Suzy │ 1.5 │
│ 2 │ Amir │ null │"""

io = IOBuffer()
showcols(io, df)
str = String(take!(io))
@test str == """
2×2 DataFrames.DataFrame

│ Col # │ Name │ Eltype │ Missing │ Values │
├───────┼──────┼────────────────────────────┼─────────┼───────────────┤
│ 1 │ Fish │ String │ 0 │ Suzy … Amir │
│ 2 │ Mass │ Union{Float64, Nulls.Null} │ 1 │ 1.5 … null │"""

# Test computing width for Array{String} columns
df = DataFrame(Any[["a"]], [:x])
io = IOBuffer()
Expand Down