Minor cleanup, bump version to 0.14.1.

TidierOrg · Dec 19, 2023 · fbd4783 · fbd4783
1 parent 471e382
commit fbd4783
Show file tree

Hide file tree

Showing 7 changed files with 157 additions and 144 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # TidierData.jl updates
 
+## v0.14.1 - 2023-12-19
+- `@separate()` now supports regular expressions
+- Adds `@separate_rows()`
+
 ## v0.14.0 - 2023-12-12
 - Update parsing engine so that non-function reserved names from the Base and Core modules (like `missing`, `pi`, and `Real`) are auto-escaped now, with the exception of names in the not_escaped[] array, which are never escaped
 - Add `collect()` to not_vectorized[] array

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierData"
 uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 authors = ["Karandeep Singh"]
-version = "0.14.0"
+version = "0.14.1"
 
 [deps]
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

diff --git a/README.md b/README.md
@@ -91,7 +91,7 @@ TidierData.jl currently supports the following top-level macros:
 - `@left_join()`, `@right_join()`, `@inner_join()`, `@full_join()`, `@anti_join()`, and `@semi_join()`
 - `@bind_rows()` and `@bind_cols()`
 - `@pivot_wider()` and `@pivot_longer()`
-- `@separate()` and `@unite()`
+- `@separate()`, `@separate_rows()`, and `@unite()`
 - `@drop_missing()` and `@fill_missing()`
 - `@clean_names()` (as in R's `janitor::clean_names()` function)
 - `@summary()` (as in R's `summary()` function)

diff --git a/docs/examples/UserGuide/sep_unite.jl b/docs/examples/UserGuide/sep_unite.jl
@@ -4,38 +4,46 @@ using TidierData
 
 df = DataFrame(a = ["1-1", "2-2", "3-3-3"]);
 
-# ## @separate
+# ## `@separate`
 
 # Separate the "a" column into "b", "c", and "d" columns based on the dash delimiter
 
 @chain df begin
     @separate(a, (b, c, d), "-")
 end
 
-# The into columns can also be designated as follows
+# The `into` columns can also be designated as follows:
 
 new_names = ["x$(i)" for i in 1:3]; # or new_names = ["b", "c", "d"], or new_names = [:b, :c, :d]
 
 @separate(df, a, !!new_names, "-")
 
-# ## @unite
+# ## `@unite`
 
 # The `@unite` macro brings together multiple columns into one, separate the characters by a user specified delimiter
 # Here, the `@unite` macro combines the "b", "c", and "d" columns columns into a single new "new_col" column using the "/" delimiter
 
 
-df = DataFrame( b = ["1", "2", "3"], c = ["1", "2", "3"], d = [missing, missing, "3"]);
+df = DataFrame(
+       b = ["1", "2", "3"],
+       c = ["1", "2", "3"],
+       d = [missing, missing, "3"]);
 
 @chain df begin
     @unite(new_col, (b, c, d), "/")
 end
 
 
-# @separate_rows 
+# ## `@separate_rows` 
 
-# ## Separate rows into multiple rows based on a chosen delimiter.
+# Separate rows into multiple rows based on a chosen delimiter.
 
-df = DataFrame(a = 1:3, b = ["a", "aa;bb;cc", "dd;ee"], c = ["1", "2;3;4", "5;6"], d = ["7", "8;9;10", "11;12"], e = ["11", "22;33;44", "55;66"]);
+df = DataFrame(
+       a = 1:3,
+       b = ["a", "aa;bb;cc", "dd;ee"],
+       c = ["1", "2;3;4", "5;6"],
+       d = ["7", "8;9;10", "11;12"],
+       e = ["11", "22;33;44", "55;66"]);
 
-@separate_rows(df, b:5, ";")
+@separate_rows(df, b:e, ";")
 
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -103,7 +103,7 @@ TidierData.jl currently supports the following top-level macros:
     - `@left_join()`, `@right_join()`, `@inner_join()`, `@full_join()`, `@anti_join()`, and `@semi_join()`
     - `@bind_rows()` and `@bind_cols()`
     - `@pivot_wider()` and `@pivot_longer()`
-    - `@separate()` and `@unite()`
+    - `@separate()`, `@separate_rows()`, and `@unite()`
     - `@drop_missing()` and `@fill_missing`
     - `@clean_names()` (as in R's `janitor::clean_names()` function)
     - `@summary()` (as in R's `summary()` function)

diff --git a/src/docstrings.jl b/src/docstrings.jl
@@ -2912,18 +2912,21 @@ julia> @rename_with(df, str -> str_remove_all(str, "_a"), !term_a)
 
 const docstring_separate_rows =
 """
-    separate_rows(df, column(s), delimiter)
+    separate_rows(df, columns..., delimiter)
 
 Split the contents of specified columns in a DataFrame into multiple rows based on a given delimiter.
 
 # Arguments
 - `df`: A DataFrame
-- `columns`: A column or collection of columns to be split. Can be a mix of integers  and symbols
+- `columns`: A column or multiple columns to be split. Can be a mix of integers and column names.
 - `delimiter`: The string or character or regular expression used to split the column values.
 
 # Examples
 ```jldoctest
-julia> df = DataFrame(a = 1:3, b = ["a", "aa;bb;cc", "dd;ee"], c = ["1", "2;3;4", "5;6"], d = ["7", "8;9;10", "11;12"])
+julia> df = DataFrame(a = 1:3,
+                      b = ["a", "aa;bb;cc", "dd;ee"],
+                      c = ["1", "2;3;4", "5;6"],
+                      d = ["7", "8;9;10", "11;12"])
 3×4 DataFrame
  Row │ a      b         c       d      
      │ Int64  String    String  String