Merge pull request #64 from drizk1/slice(n)-in-gdf

Slice(n()) in grouped df
TidierOrg · Nov 23, 2023 · a76103b · a76103b · kdpsingh · Nov 23, 2023
2 parents 5d863d3 + e9187db
commit a76103b
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 22 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 # TidierData.jl updates
 
+## v0.13.3 - 2023-11-23
+- `@slice()` now correctly handles `n()` in grouped data frames
+
 ## v0.13.2 - 2023-11-20
 - Adds `@anti_join()` and `@semi_join()`
 

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierData"
 uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 authors = ["Karandeep Singh"]
-version = "0.13.2"
+version = "0.13.3"
 
 [deps]
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

diff --git a/src/docstrings.jl b/src/docstrings.jl
@@ -601,7 +601,7 @@ Select, remove or duplicate rows by indexing their integer positions.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = repeat('a':'c', inner = 3), b = 1:9, c = 11:19);
 
 julia> @chain df begin
        @slice(1:5)
@@ -611,36 +611,80 @@ julia> @chain df begin
      │ Char  Int64  Int64 
 ─────┼────────────────────
    1 │ a         1     11
-   2 │ b         2     12
-   3 │ c         3     13
-   4 │ d         4     14
-   5 │ e         5     15
+   2 │ a         2     12
+   3 │ a         3     13
+   4 │ b         4     14
+   5 │ b         5     15
 
 julia> @chain df begin
        @slice(-(1:2))
        end
-3×3 DataFrame
+7×3 DataFrame
  Row │ a     b      c     
      │ Char  Int64  Int64 
 ─────┼────────────────────
-   1 │ c         3     13
-   2 │ d         4     14
-   3 │ e         5     15
+   1 │ a         3     13
+   2 │ b         4     14
+   3 │ b         5     15
+   4 │ b         6     16
+   5 │ c         7     17
+   6 │ c         8     18
+   7 │ c         9     19
 
 julia> @chain df begin
        @group_by(a)
        @slice(1)
        @ungroup
        end
-5×3 DataFrame
+3×3 DataFrame
  Row │ a     b      c     
      │ Char  Int64  Int64 
 ─────┼────────────────────
    1 │ a         1     11
-   2 │ b         2     12
-   3 │ c         3     13
-   4 │ d         4     14
-   5 │ e         5     15
+   2 │ b         4     14
+   3 │ c         7     17
+
+julia> @chain df begin
+       @group_by(a)
+       @slice(n())
+       @ungroup
+       end
+3×3 DataFrame
+ Row │ a     b      c     
+     │ Char  Int64  Int64 
+─────┼────────────────────
+   1 │ a         3     13
+   2 │ b         6     16
+   3 │ c         9     19
+
+julia> @chain df begin
+       @group_by(a)
+       @slice(-n())
+       @ungroup
+       end
+6×3 DataFrame
+ Row │ a     b      c     
+     │ Char  Int64  Int64 
+─────┼────────────────────
+   1 │ a         1     11
+   2 │ a         2     12
+   3 │ b         4     14
+   4 │ b         5     15
+   5 │ c         7     17
+   6 │ c         8     18
+
+julia> @chain df begin
+       @group_by(a)
+       @slice(-(2:n()))
+       @ungroup
+       end
+3×3 DataFrame
+ Row │ a     b      c     
+     │ Char  Int64  Int64 
+─────┼────────────────────
+   1 │ a         1     11
+   2 │ b         4     14
+   3 │ c         7     17
 ```         
 """
 

diff --git a/src/slice.jl b/src/slice.jl
@@ -18,20 +18,48 @@ macro slice(df, exprs...)
     if all(clean_indices .> 0)
       if $(esc(df)) isa GroupedDataFrame
         combine($(esc(df)); ungroup = false) do sdf
-          sdf[clean_indices, :]
-        end
-      else
+            local n_rows_group = nrow(sdf)
+            local interpolated_indices = parse_slice_n.($exprs, n_rows_group)
+            local original_indices = [eval.(interpolated_indices)...]
+            local clean_indices = Int64[]
+            for index in original_indices
+              if index isa Number
+                push!(clean_indices, index)
+              else
+                append!(clean_indices, collect(index))
+              end
+            end
+            clean_indices = filter(i -> i <= n_rows_group, clean_indices)
+            sdf[clean_indices, :]
+          end
+        else
         combine($(esc(df))) do sdf
           sdf[clean_indices, :]
         end
       end
     elseif all(clean_indices .< 0)
       clean_indices = -clean_indices
       if $(esc(df)) isa GroupedDataFrame
-        combine($(esc(df)); ungroup = true) do sdf
-          sdf[Not(clean_indices), :]
-        end
-      else
+        combine($(esc(df)); ungroup = false) do sdf
+            local n_rows_group = nrow(sdf)
+            local interpolated_indices = parse_slice_n.($exprs, n_rows_group)
+            local original_indices = [eval.(interpolated_indices)...]
+            local clean_indices = Int64[]
+            for index in original_indices
+              if index isa Number
+                # index has to be absolute valued because iniital clean_indices are ignored
+                # needs to work for -n() and for -(1:n())
+                push!(clean_indices, abs(index))
+              else
+                # index has to be absolute valued because iniital clean_indices are ignored
+                # needs to work for -n() and for -(1:n())
+                append!(clean_indices, abs.(collect(index)))
+              end
+            end
+            clean_indices = filter(i -> i <= n_rows_group, clean_indices)
+            sdf[Not(clean_indices), :]
+          end
+        else
         combine($(esc(df))) do sdf
           sdf[Not(clean_indices), :]
         end