From 51748ab592d808334753cde903ca0d58f50d4c38 Mon Sep 17 00:00:00 2001 From: webofceco Date: Sun, 12 May 2024 16:26:14 -0400 Subject: [PATCH 1/5] new functions: str_unique(), str_escape(), word() --- README.md | 3 ++ docs/src/index.md | 5 +++- src/TidierStrings.jl | 49 ++++++++++++++++++++++++++++++++ src/strings_docstrings.jl | 60 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 814db6a..964ffc0 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ TidierStrings.jl currently supports: - `str_width()` - `str_trim()` - `str_subset()` +- `str_unique()` +- `str_escape()` +- `word()` ## Examples diff --git a/docs/src/index.md b/docs/src/index.md index 6bb2ea2..f222c0e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -23,4 +23,7 @@ This package includes: - `str_to_length()` - `str_to_width()` - `str_to_trim()` -- `str_subset()` \ No newline at end of file +- `str_subset()` +- `str_unique()` +- `str_escape()` +- `word()` diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 5971784..2d2e0fb 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -321,6 +321,55 @@ function str_trim(s::AbstractString, side::String="both") end end +""" +$docstring_str_escape +""" +function str_escape(string::AbstractString) + if ismissing(string) + return(string) + end + + metacharacters = ['\\', '.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$'] + escaped_string = join([c == '\\' ? "\\\\" : c in metacharacters ? "\\$c" : c for c in string], "") + + return escaped_string +end + +""" +$docstring_str_unique +""" +function str_unique(strings::AbstractVector{<:AbstractString}; ignore_case::Bool=false) + if ismissing(strings) + return(strings) + end + + unique_strings = unique(strings) + if ignore_case + unique_strings = [unique_strings[findfirst(x -> lowercase(x) == lowercase(unique_string), strings)] for unique_string in unique_strings] + end + return unique_strings +end + +""" +$docstring_word +""" +function word(string::AbstractString, start_index::Int=1, end_index::Int=start_index, sep::AbstractString=" ") + if ismissing(string) + return(string) + end + + words = split(string, sep) + + if start_index < 0 + start_index = length(words) + start_index + 1 + end + + if end_index < 0 + end_index = length(words) + end_index + 1 + end + + return words[start_index:end_index] +end """ $docstring_str_subset diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index 81cab99..140c9c6 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -380,3 +380,63 @@ Examples julia> str_trim(" hello world! 😊 ") "hello world! 😊" """ + +const docstring_str_escape = +""" + str_escape(string::AbstractString) + +Escape special characters in the string `string`. + +Arguments +- `string`: Input string. + +Returns +The string `string` with special characters escaped. + +Examples +```jldoctest +julia> str_escape("") +``` +""" + +const docstring_str_unique = +""" + str_unique(strings::AbstractVector{<:AbstractString}; ignore_case::Bool=false) + +Remove duplicates from a vector of strings. + +Arguments +- `strings`: Input vector of strings. +- `ignore_case`: Whether to ignore case when comparing strings. Default is `false`. + +Returns +A vector of unique strings from the input vector. + +Examples +```jldoctest +julia> str_unique(["hello", "world", "hello"]) +2-element Array{String,1}: + "hello" + "world" +""" + +const docstring_word = +""" + word(string::AbstractString, start_index::Int=1, end_index::Int=start_index, sep::AbstractString=" ") + +Extract a word from a string. + +Arguments +- `string`: Input string. +- `start_index`: The starting index of the word. Default is 1. +- `end_index`: The ending index of the word. Default is `start_index`. +- `sep`: The separator between the start and end indices. Default is a space. + +Returns +The extracted word from the string. + +Examples +```jldoctest +julia> word("hello world!", 6, 10) +"world" +""" \ No newline at end of file From 6e6d4ba3e0dfb200f21449ef9bc01947d6fd0200 Mon Sep 17 00:00:00 2001 From: webofceco Date: Sun, 12 May 2024 16:30:06 -0400 Subject: [PATCH 2/5] export functions --- src/TidierStrings.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 2d2e0fb..41e56b5 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -1,7 +1,7 @@ module TidierStrings export str_detect, str_replace, str_replace_all, str_remove_all, str_remove, str_count, str_squish, str_equal, str_to_upper, str_to_lower, str_split, str_subset, - str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim + str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, str_escape, word include("strings_docstrings.jl") From 58cb9e5f26ed5493e8305e5476eee10a3a3f8480 Mon Sep 17 00:00:00 2001 From: webofceco Date: Sun, 12 May 2024 16:36:09 -0400 Subject: [PATCH 3/5] fixed docstrings --- src/strings_docstrings.jl | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index 140c9c6..ec33fa1 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -395,7 +395,8 @@ The string `string` with special characters escaped. Examples ```jldoctest -julia> str_escape("") +julia> str_escape("This is a string with special characters: \\ .* + ? | ( ) [ ] { } ^ \$") +"This is a string with special characters: \\\\ \\. \\* \\+ \\? \\| \\( \\) \\[ \\] \\{ \\} \\^ \\$" ``` """ @@ -414,10 +415,12 @@ A vector of unique strings from the input vector. Examples ```jldoctest -julia> str_unique(["hello", "world", "hello"]) -2-element Array{String,1}: - "hello" - "world" +julia> str_unique(["apple", "banana", "pear", "banana", "Apple"]) +4-element Vector{String}: + "apple" + "banana" + "pear" + "Apple" """ const docstring_word = @@ -437,6 +440,21 @@ The extracted word from the string. Examples ```jldoctest -julia> word("hello world!", 6, 10) -"world" +julia> word("Jane saw a cat", 1) +1-element Vector{String}: + "Jane" + +julia> word("Jane saw a cat", 2) +1-element Vector{String}: + "saw" + +julia> word("Jane saw a cat", -1) +1-element Vector{String}: + "cat" + +julia> word("Jane saw a cat", 2, -1) +3-element Vector{String}: + "saw" + "a" + "cat" """ \ No newline at end of file From 78a14f5a374bc90bf9d5bd72398d0f997e7a9106 Mon Sep 17 00:00:00 2001 From: webofceco Date: Sun, 12 May 2024 16:39:03 -0400 Subject: [PATCH 4/5] removed $ from str_escape --- src/TidierStrings.jl | 2 +- src/strings_docstrings.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 41e56b5..55c0d15 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -329,7 +329,7 @@ function str_escape(string::AbstractString) return(string) end - metacharacters = ['\\', '.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$'] + metacharacters = ['\\', '.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^'] escaped_string = join([c == '\\' ? "\\\\" : c in metacharacters ? "\\$c" : c for c in string], "") return escaped_string diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index ec33fa1..d7c1123 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -395,8 +395,8 @@ The string `string` with special characters escaped. Examples ```jldoctest -julia> str_escape("This is a string with special characters: \\ .* + ? | ( ) [ ] { } ^ \$") -"This is a string with special characters: \\\\ \\. \\* \\+ \\? \\| \\( \\) \\[ \\] \\{ \\} \\^ \\$" +julia> str_escape("This is a string with special characters: \\ .* + ? | ( ) [ ] { } ^") +"This is a string with special characters: \\\\ \\. \\* \\+ \\? \\| \\( \\) \\[ \\] \\{ \\} \\^" ``` """ From d0dcff3bdb98b9795e06f62de0bb56268de0685b Mon Sep 17 00:00:00 2001 From: webofceco Date: Sun, 12 May 2024 16:46:12 -0400 Subject: [PATCH 5/5] removed str_escape() for now --- README.md | 1 - docs/src/index.md | 1 - src/TidierStrings.jl | 16 +--------------- src/strings_docstrings.jl | 19 ------------------- 4 files changed, 1 insertion(+), 36 deletions(-) diff --git a/README.md b/README.md index 964ffc0..e48e402 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,6 @@ TidierStrings.jl currently supports: - `str_trim()` - `str_subset()` - `str_unique()` -- `str_escape()` - `word()` ## Examples diff --git a/docs/src/index.md b/docs/src/index.md index f222c0e..82a5b19 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -25,5 +25,4 @@ This package includes: - `str_to_trim()` - `str_subset()` - `str_unique()` -- `str_escape()` - `word()` diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 55c0d15..78cfc2a 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -1,7 +1,7 @@ module TidierStrings export str_detect, str_replace, str_replace_all, str_remove_all, str_remove, str_count, str_squish, str_equal, str_to_upper, str_to_lower, str_split, str_subset, - str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, str_escape, word + str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word include("strings_docstrings.jl") @@ -321,20 +321,6 @@ function str_trim(s::AbstractString, side::String="both") end end -""" -$docstring_str_escape -""" -function str_escape(string::AbstractString) - if ismissing(string) - return(string) - end - - metacharacters = ['\\', '.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^'] - escaped_string = join([c == '\\' ? "\\\\" : c in metacharacters ? "\\$c" : c for c in string], "") - - return escaped_string -end - """ $docstring_str_unique """ diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index d7c1123..d28797f 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -381,25 +381,6 @@ julia> str_trim(" hello world! 😊 ") "hello world! 😊" """ -const docstring_str_escape = -""" - str_escape(string::AbstractString) - -Escape special characters in the string `string`. - -Arguments -- `string`: Input string. - -Returns -The string `string` with special characters escaped. - -Examples -```jldoctest -julia> str_escape("This is a string with special characters: \\ .* + ? | ( ) [ ] { } ^") -"This is a string with special characters: \\\\ \\. \\* \\+ \\? \\| \\( \\) \\[ \\] \\{ \\} \\^" -``` -""" - const docstring_str_unique = """ str_unique(strings::AbstractVector{<:AbstractString}; ignore_case::Bool=false)