Skip to content

Commit

Permalink
remove regexp_extract_all
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPowers committed Feb 10, 2024
1 parent 11fdf0b commit c2e581a
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 30 deletions.
2 changes: 0 additions & 2 deletions quinn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
exists,
forall,
multi_equals,
regexp_extract_all,
remove_all_whitespace,
remove_non_word_characters,
single_space,
Expand Down Expand Up @@ -75,7 +74,6 @@
"week_start_date",
"week_end_date",
"approx_equal",
"regexp_extract_all",
"business_days_between",
"uuid5",
"with_columns_renamed",
Expand Down
14 changes: 0 additions & 14 deletions quinn/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,20 +230,6 @@ def array_choice(col: Column, seed: int | None = None) -> Column:
return col[index]


@F.udf(returnType=ArrayType(StringType()))
def regexp_extract_all(s: Column, regexp: Column) -> Column:
"""Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`).
It returns a list of all matches, or `None` if `s` is `None`.
:param s: input string (`Column`)
:type s: str
:param regexp: string `re` pattern
:rtype: Column
"""
return None if s is None else re.findall(regexp, s)


def business_days_between(
start_date: Column, end_date: Column, # noqa: ARG001
) -> Column:
Expand Down
14 changes: 0 additions & 14 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,20 +329,6 @@ def it_works_with_integer_values():
# chispa.assert_column_equality(actual_df, "random_letter", "expected")


def test_regexp_extract_all():
df = quinn.create_df(
spark,
[("200 - 300 PA.", ["200", "300"]), ("400 PA.", ["400"]), (None, None)],
[
("str", StringType(), True),
("expected", ArrayType(StringType(), True), True),
],
)
actual_df = df.withColumn(
"all_numbers", quinn.regexp_extract_all(F.col("str"), F.lit(r"(\d+)"))
)
chispa.assert_column_equality(actual_df, "all_numbers", "expected")


def test_business_days_between():
df = quinn.create_df(
Expand Down

0 comments on commit c2e581a

Please sign in to comment.