ClickHouse · rschu1ze · Jul 6, 2025 · Jul 4, 2025 · Jul 4, 2025 · Jul 6, 2025
diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
@@ -192,6 +192,20 @@ SELECT count() FROM hackernews WHERE hasToken(lower(comment), 'clickhouse');
 
 These functions are the most performant options to use with the `text` index.
 
+#### searchAny and searchAll {#functions-example-searchany-searchall}
+
+Functions `searchAny` and `searchAll` check if the column contains rows which match any or all of search terms.
+
+Compared to `hasToken`, these functions accept multiple search terms.
+
+Example:
+
+```sql
+SELECT count() FROM hackernews WHERE searchAny(lower(comment), 'clickhouse chdb');
+
+SELECT count() FROM hackernews WHERE searchAll(lower(comment), 'clickhouse chdb');
+```
+
 ## Full-text search of the Hacker News dataset {#full-text-search-of-the-hacker-news-dataset}
 
 Let's look at the performance improvements of text indexes on a large dataset with lots of text.

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -463,6 +463,8 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
 | [hasTokenOrNull](/sql-reference/functions/string-search-functions.md/#hastokenornull)                                          | ✗           | ✗      | ✗          | ✔          | ✗            | ✔    |
 | [hasTokenCaseInsensitive (`*`)](/sql-reference/functions/string-search-functions.md/#hastokencaseinsensitive)                  | ✗           | ✗      | ✗          | ✔          | ✗            | ✗    |
 | [hasTokenCaseInsensitiveOrNull (`*`)](/sql-reference/functions/string-search-functions.md/#hastokencaseinsensitiveornull)      | ✗           | ✗      | ✗          | ✔          | ✗            | ✗    |
+| [searchAny](/sql-reference/functions/string-search-functions.md/#searchany)                                                    | ✗           | ✗      | ✗          | ✗          | ✗            | ✔    |
+| [searchAll](/sql-reference/functions/string-search-functions.md/#searchall)                                                    | ✗           | ✗      | ✗          | ✗          | ✗            | ✔    |
 
 Functions with a constant argument that is less than ngram size can't be used by `ngrambf_v1` for query optimization.
 

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
@@ -755,6 +755,124 @@ Result:
 1
 ```
 
+## searchAny {#searchany}
+
+:::note
+This function can only be used if setting [allow_experimental_full_text_index](/operations/settings/settings#allow_experimental_full_text_index) is true.
+:::
+
+Returns 1, if at least one string needle<sub>i</sub> matches the `input` column and 0 otherwise.
+
+**Syntax**
+
+```sql
+searchAny(input, ['needle1', 'needle2', ..., 'needleN'])
+```
+
+**Parameters**
+
+- `input` — The input column. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `needles` — tokens to be searched and supports a max of 64 tokens. [Array](../data-types/array.md)([String](../data-types/string.md)).
+
+:::note
+This function must be used only with a [full-text index][/engines/table-engines/mergetree-family/invertedindexes.md] column.
+The input data is tokenized by the tokenizer from the index definition.
+:::
+
+:::note
+Each string needle<sub>i</sub> would be tokenized as `tokens(needle<sub>i</sub>, [tokenizer from the index definition])`.
+This means both `['word1;word2']` and `['word1,word2']` would be tokenized as `['word1','word2']` in case of the `default` tokenizer.
+Refer [tokens](splitting-merging-functions.md#tokens) for more information about the supported separators.
+:::
+
+**Returned value**
+
+- 1, if there was at least one match.
+- 0, otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE text_table (
+    id UInt32,
+    msg String,
+    INDEX idx(msg) TYPE text(tokenizer = 'split', separators = ['()', '\\'])
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO text_table VALUES (1, '()a,\\bc()d'), (2, '()\\a()bc\\d'), (3, ',()a\\,bc,(),d,');
+
+SELECT count() FROM `text_table` WHERE searchAny(msg, ['a', 'd']);
+```
+
+Result:
+
+```response
+3
+```
+
+## searchAll {#searchall}
+
+:::note
+This function can only be used if setting [allow_experimental_full_text_index](/operations/settings/settings#allow_experimental_full_text_index) is true.
+:::
+
+Like [searchAny](#searchany), but returns 1 only if all string needle<sub>i</sub> matches the `input` column and 0 otherwise.
+
+**Syntax**
+
+```sql
+searchAll(input, ['needle1', 'needle2', ..., 'needleN'])
+```
+
+**Parameters**
+
+- `input` — The input column. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `needles` — tokens to be searched and supports a max of 64 tokens. [Array](../data-types/array.md)([String](../data-types/string.md)).
+
+:::note
+This function must be used only with a [full-text index][/engines/table-engines/mergetree-family/invertedindexes.md] column.
+The input data is tokenized by the tokenizer from the index definition.
+:::
+
+:::note
+Each string needle<sub>i</sub> would be tokenized as `tokens(needle<sub>i</sub>, [tokenizer from the index definition])`.
+This means both `['word1;word2']` and `['word1,word2']` would be tokenized as `['word1','word2']` in case of the `default` tokenizer.
+Refer [tokens](splitting-merging-functions.md#tokens) for more information about the supported separators.
+:::
+
+**Returned value**
+
+- 1, if all needles match.
+- 0, otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE text_table (
+    id UInt32,
+    msg String,
+    INDEX idx(msg) TYPE text(tokenizer = 'split', separators = ['()', '\\']) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO `text_table` VALUES (1, '()a,\\bc()d'), (2, '()\\a()bc\\d'), (3, ',()a\\,bc,(),d,');
+
+SELECT count() FROM `text_table` WHERE searchAll(msg, ['a', 'd']);
+```
+
+Result:
+
+```response
+1
+```
+
 ## match {#match}
 
 Returns whether string `haystack` matches the regular expression `pattern` in [re2 regular expression syntax](https://github.com/google/re2/wiki/Syntax).

diff --git a/src/Functions/searchAnyAll.cpp b/src/Functions/searchAnyAll.cpp
@@ -228,4 +228,22 @@ template class FunctionSearchImpl<traits::SearchAllTraits>;
 
 FunctionDocumentation::IntroducedIn introduced_in = {25, 7};
 FunctionDocumentation::Category category = FunctionDocumentation::Category::StringSearch;
+
+REGISTER_FUNCTION(SearchAny)
+{
+    factory.registerFunction<FunctionSearchImpl<traits::SearchAnyTraits>>(FunctionDocumentation{
+        .description = "Searches the needle tokens in the generated tokens from the text by a given tokenizer. Returns true if any needle "
+                       "tokens exists in the text, otherwise false.",
+        .introduced_in = introduced_in,
+        .category = category});
+}
+
+REGISTER_FUNCTION(SearchAll)
+{
+    factory.registerFunction<FunctionSearchImpl<traits::SearchAllTraits>>(FunctionDocumentation{
+        .description = "Searches the needle tokens in the generated tokens from the text by a given tokenizer. Returns true if all needle "
+                       "tokens exists in the text, otherwise false.",
+        .introduced_in = introduced_in,
+        .category = category});
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -54,6 +54,7 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
+#include <Interpreters/GinFilter.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Interpreters/PartLog.h>
@@ -877,6 +878,7 @@ void MergeTreeData::checkProperties(
     if (!new_metadata.secondary_indices.empty())
     {
         std::unordered_set<String> indices_names;
+        std::unordered_set<String> columns_with_text_indexes;
 
         for (const auto & index : new_metadata.secondary_indices)
         {
@@ -891,10 +893,25 @@ void MergeTreeData::checkProperties(
 
             MergeTreeIndexFactory::instance().validate(index, attach);
 
-            if (indices_names.find(index.name) != indices_names.end())
+            if (indices_names.contains(index.name))
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Index with name {} already exists", backQuote(index.name));
 
             indices_names.insert(index.name);
+
+            /// Workaround for https://github.com/ClickHouse/ClickHouse/issues/82385 where functions searchAll/searchAny don't work
+            /// on columns with more than one text index
+            if (index.type == TEXT_INDEX_NAME)
+            {
+                const auto & column = index.column_names[0];
+
+                if (columns_with_text_indexes.contains(column))
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Column {} must not have more than one text index",
+                        backQuote(index.column_names[0]));
+
+                columns_with_text_indexes.insert(column);
+            }
         }
     }
 
@@ -914,7 +931,7 @@ void MergeTreeData::checkProperties(
 
         for (const auto & projection : new_metadata.projections)
         {
-            if (projections_names.find(projection.name) != projections_names.end())
+            if (projections_names.contains(projection.name))
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name));
 
             const auto settings = getSettings();

diff --git a/tests/queries/0_stateless/02346_text_index_creation.reference b/tests/queries/0_stateless/02346_text_index_creation.reference
@@ -13,4 +13,7 @@ Parameters are shuffled.
 Types are incorrect.
 Same argument appears >1 times.
 Must be created on single column.
+A column must not have >1 text index
+-- CREATE TABLE
+-- ALTER TABLE
 Must be created on String or FixedString or LowCardinality(String) or LowCardinality(FixedString) columns.
diff --git a/tests/queries/0_stateless/02346_text_index_creation.sql b/tests/queries/0_stateless/02346_text_index_creation.sql
@@ -267,6 +267,34 @@ CREATE TABLE tab
 )
 ENGINE = MergeTree ORDER BY key; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
+SELECT 'A column must not have >1 text index';
+
+SELECT '-- CREATE TABLE';
+
+CREATE TABLE tab(
+    s String,
+    INDEX idx_1(s) TYPE text(tokenizer = 'default'),
+    INDEX idx_2(s) TYPE text(tokenizer = 'ngram', ngram_size = 3)
+)
+Engine = MergeTree()
+ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
+
+SELECT '-- ALTER TABLE';
+
+CREATE TABLE tab
+(
+    str String,
+    INDEX idx_1 (str) TYPE text(tokenizer = 'default')
+)
+ENGINE = MergeTree ORDER BY tuple();
+
+ALTER TABLE tab ADD INDEX idx_2(str) TYPE text(tokenizer = 'ngram', ngram_size = 3); -- { serverError BAD_ARGUMENTS }
+
+-- It must still be possible to create a column on the same column with a different expression
+ALTER TABLE tab ADD INDEX idx_3(lower(str)) TYPE text(tokenizer = 'ngram', ngram_size = 3);
+
+DROP TABLE tab;
+
 SELECT 'Must be created on String or FixedString or LowCardinality(String) or LowCardinality(FixedString) columns.';
 
 CREATE TABLE tab