Skip to content

Commit

Permalink
replace_missing and missing_if
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed Oct 3, 2023
1 parent 97d0955 commit 792dea0
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 11 deletions.
15 changes: 15 additions & 0 deletions docs/examples/UserGuide/fill_missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,18 @@ end
@fill_missing(a, "down")
end

# ## `replace_missing()`
# The `replace_missing` function facilitates the replacement of `missing` values with a specified replacement.

@chain df begin
@mutate(b = replace_missing(b, 2))
end

# ## `missing_if()`
# The `missing_if` function is used to introduce `missing` values under specific conditions.

@chain df begin
@mutate(b = missing_if(b, 5))
end

# Both `missing_if` and `replace_missing` are not type specifc.
2 changes: 1 addition & 1 deletion src/TidierData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ using Reexport
@reexport using ShiftedArrays: lag, lead

export TidierData_set, across, desc, n, row_number, everything, starts_with, ends_with, matches, if_else, case_when, ntile,
as_float, as_integer, as_string, is_float, is_integer, is_string, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter,
as_float, as_integer, as_string, is_float, is_integer, is_string, missing_if, replace_missing, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter,
@group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join,
@pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_missing, @glimpse, @separate,
@unite, @summary, @fill_missing, @slice_sample
Expand Down
56 changes: 56 additions & 0 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2441,3 +2441,59 @@ julia> @chain df begin
5 │ 25 5 15
```
"""

const docstring_missing_if =
"""
missing_if(x, value)
Replace a specific `value` with `missing` in `x`.
## Arguments
- `x`: The input value which can be of any type. If `x` is already `missing` or equals `value`, the function will return `missing`. Otherwise, it returns `x` unaltered.
- `value`: The specific value to be checked against.
## Examples
```jldoctest
julia> df = DataFrame(a = [1, missing, 3, 4], b = ["apple", "apple", "banana", "cherry"])
julia> @chain df begin
@mutate(a = missing_if(a, 4), b = missing_if(b, "apple"))
end
4×2 DataFrame
Row │ a b
│ Int64? String?
─────┼──────────────────
1 │ 1 missing
2 │ missing missing
3 │ 3 banana
4 │ missing cherry
```
"""

const docstring_replace_missing =
"""
replace_missing(x, replacement)
Replace `missing` values in `x` with a specified `replacement` value.
# Arguments
- `x`: The input value which can be of any type. If `x` is `missing`, the function will return `replacement`. Otherwise, it returns `x` unaltered.
- `replacement`: The value to replace `missing` with in `x`.
# Examples
```jldoctest
julia> df = DataFrame(a = [1, missing, 3, 4], b = [4, 5, missing, 8]);
julia> @chain df begin
@mutate(a = replace_missing(a, 100), b = replace_missing(b, 35))
end
4×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 4
2 │ 100 5
3 │ 3 35
4 │ 4 8
```
"""
12 changes: 11 additions & 1 deletion src/missings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,14 @@ macro fill_missing(df, args...)
fill_missing($(esc(df)), [$(cols_quoted...)], $method)
end
end
end
end

"""
$docstring_missing_if
"""
missing_if(x, value) = ismissing(x) ? x : (x == value ? missing : x)

"""
$docstring_replace_missing
"""
replace_missing(x, replacement) = ismissing(x) ? replacement : x
28 changes: 19 additions & 9 deletions src/separate_unite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,24 @@ end
$docstring_unite
"""
macro unite(df, new_col, from_cols, sep)
new_col = QuoteNode(new_col)

if @capture(from_cols, (args__,))
elseif @capture(from_cols, [args__])
new_col_quoted = QuoteNode(new_col)
interpolated_from_cols, _, _ = parse_interpolation(from_cols)

if @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__])
args = QuoteNode.(args)
from_cols_expr = :[$(args...)]
else
from_cols_expr = quote
if typeof($interpolated_from_cols) <: Tuple
collect(Symbol.($interpolated_from_cols))

else
$interpolated_from_cols
end
end
end

args = QuoteNode.(args)
var_expr = quote
unite($(esc(df)), $new_col, [$(args...)], $sep)

return quote
unite($(esc(df)), $new_col_quoted, $(from_cols_expr), $(esc(sep)))
end
end
end

0 comments on commit 792dea0

Please sign in to comment.