Skip to content

Commit

Permalink
Merge pull request #15 from cecoeco/main
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed May 16, 2024
2 parents 4dee40e + d700f25 commit e7e0004
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 6 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ TidierStrings.jl currently supports:
- `str_trim()`
- `str_subset()`
- `str_unique()`
- `str_starts()`
- `str_ends()`
- `str_which()`
- `word()`

## Examples
Expand Down
3 changes: 3 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ This package includes:
- `str_trim()`
- `str_subset()`
- `str_unique()`
- `str_starts()`
- `str_ends()`
- `str_which()`
- `word()`
51 changes: 50 additions & 1 deletion src/TidierStrings.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module TidierStrings

export str_detect, str_replace, str_replace_all, str_remove_all, str_remove, str_count, str_squish, str_equal, str_to_upper, str_to_lower, str_split, str_subset,
str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word
str_to_title, str_to_sentence, str_dup, str_length, str_width, str_trim, str_unique, word, str_starts, str_ends, str_which

include("strings_docstrings.jl")

Expand Down Expand Up @@ -35,6 +35,55 @@ function str_detect(column, pattern::Union{String, Regex})
end
end

"""
$docstring_str_starts
"""
function str_starts(string::Vector{T}, pattern::Union{AbstractString,Regex}; negate::Bool=false)::Vector{Bool} where {T}
if pattern isa Regex
matches = [match(pattern, s) !== nothing for s in string]
return negate ? .!matches : matches
elseif pattern isa AbstractString
matches = [startswith(s, pattern) for s in string]
return negate ? .!matches : matches
else
error("Pattern must be either a Regex or an AbstractString.")
end
end

"""
$docstring_str_ends
"""
function str_ends(string::Vector{T}, pattern::Union{AbstractString,Regex}; negate::Bool=false)::Vector{Bool} where {T}
if pattern isa Regex
matches = [match(pattern, s) !== nothing for s in string]
return negate ? .!matches : matches
elseif pattern isa AbstractString
matches = [endswith(s, pattern) for s in string]
return negate ? .!matches : matches
else
error("Pattern must be either a Regex or an AbstractString.")
end
end

"""
$docstring_str_which
"""
function str_which(strings::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false)::Vector{Int} where {T}
indices = Int[]
for (i, s) in enumerate(strings)
if pattern isa Regex && occursin(pattern, s)
push!(indices, i)
elseif pattern isa AbstractString && !ismissing(s) && occursin(pattern, s)
push!(indices, i)
end
end

if negate
return setdiff(1:length(strings), indices)
else
return indices
end
end

"""
$docstring_str_replace
Expand Down
107 changes: 102 additions & 5 deletions src/strings_docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ julia> str_to_sentence("hello world!")
"Hello world!"
julia> str_to_sentence("a sentence mUst starT With A capital letter.")
"A sentence must start With a capital letter."
"A sentence must start with a capital letter."
```
"""

const docstring_str_dup =
Expand All @@ -317,6 +318,7 @@ Examples
```jldoctest
julia> str_dup("hello", 3)
"hellohellohello"
```
"""

const docstring_str_length =
Expand Down Expand Up @@ -360,6 +362,7 @@ julia> str_width("hello world! 😊")
julia> str_width("😊")
2
```
"""

const docstring_str_trim =
Expand All @@ -379,6 +382,7 @@ Examples
```jldoctest
julia> str_trim(" hello world! 😊 ")
"hello world! 😊"
```
"""

const docstring_str_unique =
Expand All @@ -402,6 +406,7 @@ julia> str_unique(["apple", "banana", "pear", "banana", "Apple"])
"banana"
"pear"
"Apple"
```
"""

const docstring_word =
Expand All @@ -422,20 +427,112 @@ The extracted word from the string.
Examples
```jldoctest
julia> word("Jane saw a cat", 1)
1-element Vector{String}:
1-element Vector{SubString{String}}:
"Jane"
julia> word("Jane saw a cat", 2)
1-element Vector{String}:
1-element Vector{SubString{String}}:
"saw"
julia> word("Jane saw a cat", -1)
1-element Vector{String}:
1-element Vector{SubString{String}}:
"cat"
julia> word("Jane saw a cat", 2, -1)
3-element Vector{String}:
3-element Vector{SubString{String}}:
"saw"
"a"
"cat"
```
"""

const docstring_str_starts =
"""
str_starts(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false)
Check if a string starts with a certain pattern.
Arguments
- `string`: Input string.
- `pattern`: The pattern to check for. Can be a string or a regular expression.
- `negate`: Whether to negate the result. Default is `false`.
Returns
A vector of booleans indicating if the string starts with the pattern.
Examples
```jldoctest
julia> str_starts(["apple", "banana", "pear", "pineapple"], r"^p") # [false, false, true, true]
4-element Vector{Bool}:
0
0
1
1
julia> str_starts(["apple", "banana", "pear", "pineapple"], r"^p", negate=true) # [true, true, false, false]
4-element Vector{Bool}:
1
1
0
0
```
"""

const docstring_str_ends =
"""
str_ends(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false)
Check if a string ends with a certain pattern.
Arguments
- `string`: Input string.
- `pattern`: The pattern to check for. Can be a string or a regular expression.
- `negate`: Whether to negate the result. Default is `false`.
Returns
A vector of booleans indicating if the string ends with the pattern.
Examples
```jldoctest
julia> str_ends(["apple", "banana", "pear", "pineapple"], r"e\$") # [true, false, false, true]
4-element Vector{Bool}:
1
0
0
1
julia> str_ends(["apple", "banana", "pear", "pineapple"], r"e\$", negate=true) # [false, true, true, false]
4-element Vector{Bool}:
0
1
1
0
```
"""

const docstring_str_which =
"""
str_which(string::Vector{T}, pattern::Union{AbstractString, Regex}; negate::Bool=false)
Returns the indices of strings where there's at least one match to the pattern.
# Arguments
- `string`: Input string.
- `pattern`: The pattern to check for. Can be a string or a regular expression.
- `negate`: Whether to negate the result. Default is `false`.
# Returns
An integer vector containing indices of matching strings.
# Examples
```jldoctest
julia> str_which(["apple", "banana", "pear", "pineapple"], r"a") # [1, 2, 3, 4]
4-element Vector{Int64}:
1
2
3
4
julia> str_which(["apple", "banana", "pear", "pineapple"], r"a", negate=true) # []
Int64[]
julia> str_which(["apple", "banana", "pear", "pineapple"], "a", negate=true) # []
Int64[]
```
"""

0 comments on commit e7e0004

Please sign in to comment.