From 05e539a3ac8bb3d7c38b3cfdd116503780e592c8 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 5 May 2023 02:17:09 -0400 Subject: [PATCH] Bug fix to allow multiple columns to `@distinct()`. Bumped version to 0.7.6. --- NEWS.md | 3 +++ Project.toml | 2 +- src/Tidier.jl | 16 ++++++++++++++-- src/docstrings.jl | 47 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 83e1ac4..1ad8207 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # Tidier.jl updates +## v0.7.6 - 2023-05-04 +- Fixed bug to allow multiple columns in `@distinct()` separated by commas or using selection helpers. + ## v0.7.5 - 2023-04-30 - Fixed bug to ensure that `&&` and `||` are auto-vectorized - Added docstrings and examples to show different ways of filtering by multiple "and" conditions, including `&&`, `&`, and separating multiple expressions with commas. diff --git a/Project.toml b/Project.toml index e98d84e..8aa7a65 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Tidier" uuid = "f0413319-3358-4bb0-8e7c-0c83523a93bd" authors = ["Karandeep Singh"] -version = "0.7.5" +version = "0.7.6" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/src/Tidier.jl b/src/Tidier.jl index a762731..83b8f57 100644 --- a/src/Tidier.jl +++ b/src/Tidier.jl @@ -567,7 +567,13 @@ macro distinct(df, exprs...) _ end end - unique($(tidy_exprs...)) + @chain _ begin + if length([$tidy_exprs...]) == 0 + unique(_) + else + unique(_, Cols($(tidy_exprs...))) + end + end select(Cols(Not(r"^(Tidier_n|Tidier_row_number)$"))) groupby(col_names; sort = true) # regroup end @@ -587,7 +593,13 @@ macro distinct(df, exprs...) _ end end - unique($(tidy_exprs...)) + @chain _ begin + if length([$tidy_exprs...]) == 0 + unique(_) + else + unique(_, Cols($(tidy_exprs...))) + end + end select(Cols(Not(r"^(Tidier_n|Tidier_row_number)$"))) end end diff --git a/src/docstrings.jl b/src/docstrings.jl index 477541d..d167dfb 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -712,7 +712,7 @@ If no columns or expressions are provided, then unique rows across all columns a # Examples ```jldoctest -julia> df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20); +julia> df = DataFrame(a = repeat('a':'e', inner = 2), b = repeat(1:5, 2), c = 11:20); julia> @chain df begin @distinct() @@ -726,11 +726,11 @@ julia> @chain df begin 3 │ b 3 13 4 │ b 4 14 5 │ c 5 15 - 6 │ c 6 16 - 7 │ d 7 17 - 8 │ d 8 18 - 9 │ e 9 19 - 10 │ e 10 20 + 6 │ c 1 16 + 7 │ d 2 17 + 8 │ d 3 18 + 9 │ e 4 19 + 10 │ e 5 20 julia> @chain df begin @distinct(a) @@ -742,8 +742,39 @@ julia> @chain df begin 1 │ a 1 11 2 │ b 3 13 3 │ c 5 15 - 4 │ d 7 17 - 5 │ e 9 19 + 4 │ d 2 17 + 5 │ e 4 19 + +julia> @chain df begin + @distinct(starts_with("a")) + end +5×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ a 1 11 + 2 │ b 3 13 + 3 │ c 5 15 + 4 │ d 2 17 + 5 │ e 4 19 + +julia> @chain df begin + @distinct(a, b) + end +10×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ a 1 11 + 2 │ a 2 12 + 3 │ b 3 13 + 4 │ b 4 14 + 5 │ c 5 15 + 6 │ c 1 16 + 7 │ d 2 17 + 8 │ d 3 18 + 9 │ e 4 19 + 10 │ e 5 20 ``` """