diff --git a/README.md b/README.md index e48e402..0f1beb9 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ TidierStrings.jl currently supports: - `str_trim()` - `str_subset()` - `str_unique()` +- `str_starts()` +- `str_ends()` +- `str_which()` - `word()` ## Examples diff --git a/docs/src/index.md b/docs/src/index.md index b4630ae..749dcad 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -25,4 +25,7 @@ This package includes: - `str_trim()` - `str_subset()` - `str_unique()` +- `str_starts()` +- `str_ends()` +- `str_which()` - `word()` diff --git a/src/TidierStrings.jl b/src/TidierStrings.jl index 78cfc2a..9219f45 100644 --- a/src/TidierStrings.jl +++ b/src/TidierStrings.jl @@ -1,7 +1,7 @@ module TidierStrings export str_detect, str_replace, str_replace_all, str_remove_all, str_remove, str_count, str_squish, str_equal, str_to_upper, str_to_lower, str_split, str_subset, - str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word + str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word, str_starts, str_ends, str_which include("strings_docstrings.jl") @@ -35,6 +35,55 @@ function str_detect(column, pattern::Union{String, Regex}) end end +""" +$docstring_str_starts +""" +function str_starts(string::Vector{T}, pattern::Union{AbstractString,Regex}; negate::Bool=false)::Vector{Bool} where {T} + if pattern isa Regex + matches = [match(pattern, s) !== nothing for s in string] + return negate ? .!matches : matches + elseif pattern isa AbstractString + matches = [startswith(s, pattern) for s in string] + return negate ? .!matches : matches + else + error("Pattern must be either a Regex or an AbstractString.") + end +end + +""" +$docstring_str_ends +""" +function str_ends(string::Vector{T}, pattern::Union{AbstractString,Regex}; negate::Bool=false)::Vector{Bool} where {T} + if pattern isa Regex + matches = [match(pattern, s) !== nothing for s in string] + return negate ? .!matches : matches + elseif pattern isa AbstractString + matches = [endswith(s, pattern) for s in string] + return negate ? .!matches : matches + else + error("Pattern must be either a Regex or an AbstractString.") + end +end + +""" +$docstring_str_which +""" +function str_which(strings::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false)::Vector{Int} where {T} + indices = Int[] + for (i, s) in enumerate(strings) + if pattern isa Regex && occursin(pattern, s) + push!(indices, i) + elseif pattern isa AbstractString && !ismissing(s) && occursin(pattern, s) + push!(indices, i) + end + end + + if negate + return setdiff(1:length(strings), indices) + else + return indices + end +end """ $docstring_str_replace diff --git a/src/strings_docstrings.jl b/src/strings_docstrings.jl index d28797f..307ec8a 100644 --- a/src/strings_docstrings.jl +++ b/src/strings_docstrings.jl @@ -297,7 +297,8 @@ julia> str_to_sentence("hello world!") "Hello world!" julia> str_to_sentence("a sentence mUst starT With A capital letter.") -"A sentence must start With a capital letter." +"A sentence must start with a capital letter." +``` """ const docstring_str_dup = @@ -317,6 +318,7 @@ Examples ```jldoctest julia> str_dup("hello", 3) "hellohellohello" +``` """ const docstring_str_length = @@ -360,6 +362,7 @@ julia> str_width("hello world! 😊") julia> str_width("😊") 2 +``` """ const docstring_str_trim = @@ -379,6 +382,7 @@ Examples ```jldoctest julia> str_trim(" hello world! 😊 ") "hello world! 😊" +``` """ const docstring_str_unique = @@ -402,6 +406,7 @@ julia> str_unique(["apple", "banana", "pear", "banana", "Apple"]) "banana" "pear" "Apple" +``` """ const docstring_word = @@ -422,20 +427,112 @@ The extracted word from the string. Examples ```jldoctest julia> word("Jane saw a cat", 1) -1-element Vector{String}: +1-element Vector{SubString{String}}: "Jane" julia> word("Jane saw a cat", 2) -1-element Vector{String}: +1-element Vector{SubString{String}}: "saw" julia> word("Jane saw a cat", -1) -1-element Vector{String}: +1-element Vector{SubString{String}}: "cat" julia> word("Jane saw a cat", 2, -1) -3-element Vector{String}: +3-element Vector{SubString{String}}: "saw" "a" "cat" +``` +""" + +const docstring_str_starts = +""" + str_starts(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false) + +Check if a string starts with a certain pattern. + +Arguments +- `string`: Input string. +- `pattern`: The pattern to check for. Can be a string or a regular expression. +- `negate`: Whether to negate the result. Default is `false`. + +Returns +A vector of booleans indicating if the string starts with the pattern. + +Examples +```jldoctest +julia> str_starts(["apple", "banana", "pear", "pineapple"], r"^p") # [false, false, true, true] +4-element Vector{Bool}: + 0 + 0 + 1 + 1 +julia> str_starts(["apple", "banana", "pear", "pineapple"], r"^p", negate=true) # [true, true, false, false] +4-element Vector{Bool}: + 1 + 1 + 0 + 0 +``` +""" + +const docstring_str_ends = +""" + str_ends(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false) + +Check if a string ends with a certain pattern. + +Arguments +- `string`: Input string. +- `pattern`: The pattern to check for. Can be a string or a regular expression. +- `negate`: Whether to negate the result. Default is `false`. + +Returns +A vector of booleans indicating if the string ends with the pattern. + +Examples +```jldoctest +julia> str_ends(["apple", "banana", "pear", "pineapple"], r"e\$") # [true, false, false, true] +4-element Vector{Bool}: + 1 + 0 + 0 + 1 +julia> str_ends(["apple", "banana", "pear", "pineapple"], r"e\$", negate=true) # [false, true, true, false] +4-element Vector{Bool}: + 0 + 1 + 1 + 0 +``` +""" + +const docstring_str_which = +""" + str_which(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false) + +Returns the indices of strings where there's at least one match to the pattern. + +# Arguments +- `string`: Input string. +- `pattern`: The pattern to check for. Can be a string or a regular expression. +- `negate`: Whether to negate the result. Default is `false`. + +# Returns +An integer vector containing indices of matching strings. + +# Examples +```jldoctest +julia> str_which(["apple", "banana", "pear", "pineapple"], r"a") # [1, 2, 3, 4] +4-element Vector{Int64}: + 1 + 2 + 3 + 4 +julia> str_which(["apple", "banana", "pear", "pineapple"], r"a", negate=true) # [] +Int64[] +julia> str_which(["apple", "banana", "pear", "pineapple"], "a", negate=true) # [] +Int64[] +``` """ \ No newline at end of file