TidierOrg · kdpsingh · Jun 8, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 # TidierData.jl updates
 
+## v0.16.0 - 2024-06-07
+- `unique()`, `mad()`, and `iqr()` are no longer auto-vectorized
+- Bugfix: `@ungroup()` now preserves row-ordering (and is faster)
+- Bugfix: `slice_sample()` now throws an error if no `n` or `prop` keyword argument is provided
+- Bump minimum Julia version to 1.9
+
 ## v0.15.2 - 2024-04-19
 - Update Chain.jl dependency version
 

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierData"
 uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 authors = ["Karandeep Singh"]
-version = "0.15.2"
+version = "0.16.0"
 
 [deps]
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
@@ -22,7 +22,7 @@ Reexport = "0.2, 1"
 ShiftedArrays = "2"
 Statistics = "1.6"
 StatsBase = "0.34, 1"
-julia = "1.6"
+julia = "1.9"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/TidierOrg/TidierData.jl/blob/main/LICENSE)
 [![Docs: Latest](https://img.shields.io/badge/Docs-Latest-blue.svg)](https://tidierorg.github.io/TidierData.jl/latest)
 [![Build Status](https://github.com/TidierOrg/TidierData.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/TidierOrg/TidierData.jl/actions/workflows/CI.yml?query=branch%3Amain)
-<!-- [![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/TidierData&label=Downloads)](https://pkgs.genieframework.com?packages=TidierData) -->
+[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FTidierData&query=total_requests&suffix=%2Fmonth&label=Downloads)](http://juliapkgstats.com/pkg/TidierData)
 
 <img src="/docs/src/assets/Tidier_jl_logo.png" align="right" style="padding-left:10px;" width="150"/>
 
@@ -140,4 +140,4 @@ See [NEWS.md](https://github.com/TidierOrg/TidierData.jl/blob/main/NEWS.md) for
 
 ## What's missing
 
-Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it.
+Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it.
diff --git a/docs/examples/UserGuide/conditionals.jl b/docs/examples/UserGuide/conditionals.jl
@@ -34,7 +34,7 @@ end
 
 # Although `if_else()` is convenient when evaluating a single condition, it can be cumbersome when evaluating multiple conditions because subsequent conditions need to be nested within the `no` condition for the preceding argument. For situations where multiple conditions need to be evaluated, `case_when()` is more convenient.
 
-# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value if 3 if `a >= 3` and otherwise leaves the value unchanged.
+# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value of 3 if `a >= 3` and otherwise leaves the value unchanged.
 
 @chain df begin
   @mutate(b = case_when(a >= 3  =>  3,
@@ -72,4 +72,4 @@ end
 
 # ## Do these functions work outside of TidierData.jl?
 
-# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.
+# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.
diff --git a/docs/examples/UserGuide/slice.jl b/docs/examples/UserGuide/slice.jl
@@ -64,7 +64,7 @@ end
 # ## Sample 5 random rows in the data frame
 
 @chain df begin
-  @slice_sample(5)
+  @slice_sample(n = 5)
 end
 
 # ## Slice the min
@@ -99,4 +99,4 @@ end
 
 @chain df begin 
   @slice_head(n = 3)
-end
+end
diff --git a/src/TidierData.jl b/src/TidierData.jl
@@ -28,7 +28,7 @@ const code = Ref{Bool}(false) # output DataFrames.jl code?
 const log = Ref{Bool}(false) # output tidylog output? (not yet implemented)
 
 # The global do-not-vectorize "list"
-const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical])
+const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr])
 
 # The global do-not-escape "list"
 # `in`, `∈`, and `∉` should be vectorized in auto-vec but not escaped
@@ -494,7 +494,17 @@ end
 $docstring_ungroup
 """
 macro ungroup(df)
-  :(DataFrame($(esc(df))))
+  df_expr = quote 
+    if $(esc(df)) isa GroupedDataFrame
+      transform($(esc(df)); ungroup = true)
+    else
+      copy($(esc(df)))
+    end
+  end
+  if code[]
+    @info MacroTools.prettify(df_expr)
+  end
+  return df_expr
 end
 
 """
@@ -542,7 +552,7 @@ macro distinct(df, exprs...)
       # because if the original DataFrame is grouped, it must be ungrouped
       # and then regrouped, so there's no need to make a copy up front.
       # This is because `unique()` does not work on GroupDataFrames.
-      local df_copy = DataFrame($(esc(df)))
+      local df_copy = transform($(esc(df)); ungroup = true)
       if $any_found_n
         transform!(df_copy, nrow => :TidierData_n)
       end

diff --git a/src/docstrings.jl b/src/docstrings.jl
@@ -1320,14 +1320,15 @@ julia> @semi_join(df1, df2, "a" = "a")
 
 const docstring_pivot_wider =
 """
-   @pivot_wider(df, names_from, values_from)
+   @pivot_wider(df, names_from, values_from[, values_fill])
 
 Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows.
 
 # Arguments
 - `df`: A DataFrame.
 - `names_from`: The name of the column to get the name of the output columns from.
 - `values_from`: The name of the column to get the cell values from.
+- `values_fill`: The value to replace a missing name/value combination (default is `missing`)
 
 # Examples
 ```jldoctest
@@ -3409,4 +3410,4 @@ julia> @relocate(df, B:C) # bring columns to the front
    4 │     9  D           4  B         4  D
    5 │    10  E           5  C         5  E
 ```
-"""
+"""
diff --git a/src/slice.jl b/src/slice.jl
@@ -64,7 +64,7 @@ macro slice_sample(df, exprs...)
                 as_integer(floor(n() * $expr_dict[:prop]));
                 replace=$replace))
     else
-      @slice($(esc(df)), sample(1:n(), 1; replace=$replace))
+      throw("Please provide either an `n` or a `prop` value as a keyword argument.")
     end
   end