diff --git a/README.md b/README.md index 814db6a..e48e402 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ TidierStrings.jl currently supports: - `str_width()` - `str_trim()` - `str_subset()` +- `str_unique()` +- `word()` ## Examples diff --git a/docs/src/index.md b/docs/src/index.md index 6bb2ea2..82a5b19 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -23,4 +23,6 @@ This package includes: - `str_to_length()` - `str_to_width()` - `str_to_trim()` -- `str_subset()` \ No newline at end of file +- `str_subset()` +- `str_unique()` +- `word()` diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 5971784..78cfc2a 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -1,7 +1,7 @@ module TidierStrings export str_detect, str_replace, str_replace_all, str_remove_all, str_remove, str_count, str_squish, str_equal, str_to_upper, str_to_lower, str_split, str_subset, - str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim + str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word include("strings_docstrings.jl") @@ -321,6 +321,41 @@ function str_trim(s::AbstractString, side::String="both") end end +""" +$docstring_str_unique +""" +function str_unique(strings::AbstractVector{<:AbstractString}; ignore_case::Bool=false) + if ismissing(strings) + return(strings) + end + + unique_strings = unique(strings) + if ignore_case + unique_strings = [unique_strings[findfirst(x -> lowercase(x) == lowercase(unique_string), strings)] for unique_string in unique_strings] + end + return unique_strings +end + +""" +$docstring_word +""" +function word(string::AbstractString, start_index::Int=1, end_index::Int=start_index, sep::AbstractString=" ") + if ismissing(string) + return(string) + end + + words = split(string, sep) + + if start_index < 0 + start_index = length(words) + start_index + 1 + end + + if end_index < 0 + end_index = length(words) + end_index + 1 + end + + return words[start_index:end_index] +end """ $docstring_str_subset diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index 81cab99..d28797f 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -380,3 +380,62 @@ Examples julia> str_trim(" hello world! 😊 ") "hello world! 😊" """ + +const docstring_str_unique = +""" + str_unique(strings::AbstractVector{<:AbstractString}; ignore_case::Bool=false) + +Remove duplicates from a vector of strings. + +Arguments +- `strings`: Input vector of strings. +- `ignore_case`: Whether to ignore case when comparing strings. Default is `false`. + +Returns +A vector of unique strings from the input vector. + +Examples +```jldoctest +julia> str_unique(["apple", "banana", "pear", "banana", "Apple"]) +4-element Vector{String}: + "apple" + "banana" + "pear" + "Apple" +""" + +const docstring_word = +""" + word(string::AbstractString, start_index::Int=1, end_index::Int=start_index, sep::AbstractString=" ") + +Extract a word from a string. + +Arguments +- `string`: Input string. +- `start_index`: The starting index of the word. Default is 1. +- `end_index`: The ending index of the word. Default is `start_index`. +- `sep`: The separator between the start and end indices. Default is a space. + +Returns +The extracted word from the string. + +Examples +```jldoctest +julia> word("Jane saw a cat", 1) +1-element Vector{String}: + "Jane" + +julia> word("Jane saw a cat", 2) +1-element Vector{String}: + "saw" + +julia> word("Jane saw a cat", -1) +1-element Vector{String}: + "cat" + +julia> word("Jane saw a cat", 2, -1) +3-element Vector{String}: + "saw" + "a" + "cat" +""" \ No newline at end of file