diff --git a/NEWS.md b/NEWS.md index 38d9100..7deff38 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # TidierData.jl updates +## v0.16.0 - 2024-06-07 +- `unique()`, `mad()`, and `iqr()` are no longer auto-vectorized +- Bugfix: `@ungroup()` now preserves row-ordering (and is faster) +- Bugfix: `slice_sample()` now throws an error if no `n` or `prop` keyword argument is provided +- Bump minimum Julia version to 1.9 + ## v0.15.2 - 2024-04-19 - Update Chain.jl dependency version diff --git a/Project.toml b/Project.toml index 269fcdf..9af569c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierData" uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80" authors = ["Karandeep Singh"] -version = "0.15.2" +version = "0.16.0" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" @@ -22,7 +22,7 @@ Reexport = "0.2, 1" ShiftedArrays = "2" Statistics = "1.6" StatsBase = "0.34, 1" -julia = "1.6" +julia = "1.9" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/README.md b/README.md index 217879e..dd7d71a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/TidierOrg/TidierData.jl/blob/main/LICENSE) [![Docs: Latest](https://img.shields.io/badge/Docs-Latest-blue.svg)](https://tidierorg.github.io/TidierData.jl/latest) [![Build Status](https://github.com/TidierOrg/TidierData.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/TidierOrg/TidierData.jl/actions/workflows/CI.yml?query=branch%3Amain) - +[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FTidierData&query=total_requests&suffix=%2Fmonth&label=Downloads)](http://juliapkgstats.com/pkg/TidierData) @@ -140,4 +140,4 @@ See [NEWS.md](https://github.com/TidierOrg/TidierData.jl/blob/main/NEWS.md) for ## What's missing -Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it. \ No newline at end of file +Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it. diff --git a/docs/examples/UserGuide/conditionals.jl b/docs/examples/UserGuide/conditionals.jl index 604a7fd..b1d6059 100644 --- a/docs/examples/UserGuide/conditionals.jl +++ b/docs/examples/UserGuide/conditionals.jl @@ -34,7 +34,7 @@ end # Although `if_else()` is convenient when evaluating a single condition, it can be cumbersome when evaluating multiple conditions because subsequent conditions need to be nested within the `no` condition for the preceding argument. For situations where multiple conditions need to be evaluated, `case_when()` is more convenient. -# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value if 3 if `a >= 3` and otherwise leaves the value unchanged. +# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value of 3 if `a >= 3` and otherwise leaves the value unchanged. @chain df begin @mutate(b = case_when(a >= 3 => 3, @@ -72,4 +72,4 @@ end # ## Do these functions work outside of TidierData.jl? -# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized. \ No newline at end of file +# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized. diff --git a/docs/examples/UserGuide/slice.jl b/docs/examples/UserGuide/slice.jl index a374198..db348fe 100644 --- a/docs/examples/UserGuide/slice.jl +++ b/docs/examples/UserGuide/slice.jl @@ -64,7 +64,7 @@ end # ## Sample 5 random rows in the data frame @chain df begin - @slice_sample(5) + @slice_sample(n = 5) end # ## Slice the min @@ -99,4 +99,4 @@ end @chain df begin @slice_head(n = 3) -end \ No newline at end of file +end diff --git a/src/TidierData.jl b/src/TidierData.jl index 9b88c1d..d95da69 100644 --- a/src/TidierData.jl +++ b/src/TidierData.jl @@ -28,7 +28,7 @@ const code = Ref{Bool}(false) # output DataFrames.jl code? const log = Ref{Bool}(false) # output tidylog output? (not yet implemented) # The global do-not-vectorize "list" -const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical]) +const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr]) # The global do-not-escape "list" # `in`, `∈`, and `∉` should be vectorized in auto-vec but not escaped @@ -494,7 +494,17 @@ end $docstring_ungroup """ macro ungroup(df) - :(DataFrame($(esc(df)))) + df_expr = quote + if $(esc(df)) isa GroupedDataFrame + transform($(esc(df)); ungroup = true) + else + copy($(esc(df))) + end + end + if code[] + @info MacroTools.prettify(df_expr) + end + return df_expr end """ @@ -542,7 +552,7 @@ macro distinct(df, exprs...) # because if the original DataFrame is grouped, it must be ungrouped # and then regrouped, so there's no need to make a copy up front. # This is because `unique()` does not work on GroupDataFrames. - local df_copy = DataFrame($(esc(df))) + local df_copy = transform($(esc(df)); ungroup = true) if $any_found_n transform!(df_copy, nrow => :TidierData_n) end diff --git a/src/docstrings.jl b/src/docstrings.jl index a53788a..f6d76fd 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1320,7 +1320,7 @@ julia> @semi_join(df1, df2, "a" = "a") const docstring_pivot_wider = """ - @pivot_wider(df, names_from, values_from) + @pivot_wider(df, names_from, values_from[, values_fill]) Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows. @@ -1328,6 +1328,7 @@ Reshapes the DataFrame to make it wider, increasing the number of columns and re - `df`: A DataFrame. - `names_from`: The name of the column to get the name of the output columns from. - `values_from`: The name of the column to get the cell values from. +- `values_fill`: The value to replace a missing name/value combination (default is `missing`) # Examples ```jldoctest @@ -3409,4 +3410,4 @@ julia> @relocate(df, B:C) # bring columns to the front 4 │ 9 D 4 B 4 D 5 │ 10 E 5 C 5 E ``` -""" \ No newline at end of file +""" diff --git a/src/slice.jl b/src/slice.jl index bb49831..a6ada20 100644 --- a/src/slice.jl +++ b/src/slice.jl @@ -64,7 +64,7 @@ macro slice_sample(df, exprs...) as_integer(floor(n() * $expr_dict[:prop])); replace=$replace)) else - @slice($(esc(df)), sample(1:n(), 1; replace=$replace)) + throw("Please provide either an `n` or a `prop` value as a keyword argument.") end end