BuildingEnergySimulationTools · BaptisteDE · Feb 20, 2026 · Feb 20, 2026
diff --git a/tide/utils.py b/tide/utils.py
@@ -6,7 +6,6 @@
 from bigtree import dict_to_tree, levelordergroup_iter
 from bigtree.node import node
 from typing import TypeVar
-import re
 
 T = TypeVar("T", bound=node.Node)
 
@@ -145,136 +144,102 @@ def get_data_col_names_from_root(data_root):
     ][-1]
 
 
-def find_cols_with_tide_tags(
-    data_columns: pd.Index | list[str], request: str
-) -> list[str]:
-    request_parts = request.split("__")
-
-    if not (1 <= len(request_parts) <= 4):
-        raise ValueError(
-            f"Request '{request}' is malformed. "
-            f"Use 'name__unit__bloc__sub_bloc' format or a "
-            f"combination of these tags."
-        )
-
-    full_tag_col_map = {
-        col_name_tag_enrichment(col, get_tags_max_level(data_columns)): col
-        for col in data_columns
-    }
-
-    def find_exact_match(search_str, target):
-        pattern = rf"(?:^|__)(?:{re.escape(search_str)})(?:$|__)"
-        match = re.search(pattern, target)
-        return match is not None
-
-    return [
-        full_tag_col_map[augmented_col]
-        for augmented_col in full_tag_col_map.keys()
-        if all(find_exact_match(part, augmented_col) for part in request_parts)
-    ]
-
-
-def find_cols_multiple_tag_groups(
-    data_columns: pd.Index | list[str], request: str
-) -> list[str]:
-    request_parts = request.split("|")
-    list_to_return = []
-    for req in request_parts:
-        list_to_return.extend(find_cols_with_tide_tags(data_columns, req))
-    return list_to_return
-
-
 def tide_request(
-    data_columns: pd.Index | list[str], request: str | pd.Index | list[str] = None
+    data_columns: pd.Index | list[str],
+    request: str | list[str] | pd.Index | None = None,
 ) -> list[str]:
     """
     Select columns by matching structured TIDE-style tags.
 
-    Filters column names based on a TIDE-style structured tag syntax. Columns are
-    expected to use a naming convention with double underscores (`__`) separating
-    tags.
+    Column names follow the format:
+
+        name__unit__bloc__sub_bloc
+
+    Tags are separated by double underscores ("__"). Not all levels are required.
+
+    Before matching, column names are automatically enriched to the maximum
+    tag depth present in `data_columns`. Missing tag levels are filled using
+    DEFAULT_TAGS, ensuring consistent hierarchical comparison.
 
-    A column name can include up to four hierarchical parts:
-    'name__unit__bloc__sub_bloc' where each part is optional, but must be separated
-    with double underscores.
+    The `request` argument defines tag queries:
 
-    The `request` argument allows searching for columns matching one or more
-    of these parts using full or partial tag patterns. Multiple tag patterns
-    can be combined using the pipe `|` character to form OR conditions.
+    - Tags are separated by "__"
+    - OR conditions are separated by "|"
+    - Multiple request entries are OR-combined
+    - Matching is exact per tag part (no substring matching)
 
     Parameters
     ----------
     data_columns : pandas.Index or list of str
-        A collection of column names to filter. Each column name should follow
-        the TIDE format (e.g., "sensor__°C__bloc1").
+        Collection of column names using TIDE-style tagging.
 
-    request : str or list of str or pandas.Index, optional
-        Tag(s) to match against the column names. Each tag string may be:
+    request : str or list[str] or pandas.Index, optional
+        Tag query expression(s). Each expression may contain:
 
-        - A full structured tag (e.g., "name__°C__bloc2")
+        - A full tag path (e.g., "name__°C__bloc2")
         - A partial tag (e.g., "°C", "bloc1")
-        - A group of tags separated by "|" (e.g., "kWh|°C")
+        - OR groups separated by "|" (e.g., "kWh|°C")
 
-        If None, all columns from `data_columns` are returned.
+        If None, all columns are returned.
 
     Returns
     -------
-    list of str
-        The list of column names that match any of the provided tag queries.
+    list[str]
+        Column names matching at least one request expression.
+        Order is preserved and duplicates are removed.
 
     Notes
     -----
-    - Matching is done per tag part, not substrings. For instance, the query
-      "bloc1" will match "name__°C__bloc1" but not "bloc11".
-    - If multiple requests are given, columns are returned if they match
-      at least one of them (logical OR).
-    - Tags can include between 1 and 4 parts, split by `__`.
+    - Matching is performed on enriched tag representations.
+    - Default tag values (e.g., "OTHER") may be injected during enrichment.
+    - Matching is exact at tag level, not substring-based.
+    - Requests may contain between 1 and 4 tag levels.
 
     Examples
     --------
-    >>> DF_COLUMNS = [
-    ...     "name_1__°C__bloc1",
-    ...     "name_1__°C__bloc2",
-    ...     "name_2",
-    ...     "name_2__DIMENSIONLESS__bloc2",
-    ...     "name_3__kWh/m²",
-    ...     "name_5__kWh",
-    ...     "name4__DIMENSIONLESS__bloc4",
-    ... ]
-
-    >>> tide_request(DF_COLUMNS)
-    ['name_1__°C__bloc1', 'name_1__°C__bloc2', 'name_2',
-     'name_2__DIMENSIONLESS__bloc2', 'name_3__kWh/m²',
-     'name_5__kWh', 'name4__DIMENSIONLESS__bloc4']
-
     >>> tide_request(DF_COLUMNS, "°C")
-    ['name_1__°C__bloc1', 'name_1__°C__bloc2']
-
     >>> tide_request(DF_COLUMNS, "kWh|°C")
-    ['name_5__kWh', 'name_1__°C__bloc1', 'name_1__°C__bloc2']
-
-    >>> # Columns are not selected twice
     >>> tide_request(DF_COLUMNS, ["kWh|°C", "name_5__kWh"])
-    ['name_5__kWh', 'name_1__°C__bloc1', 'name_1__°C__bloc2']
     """
 
     if request is None:
         return list(data_columns)
 
-    elif isinstance(request, str):
+    if isinstance(request, str):
         request = [request]
 
-    if not (isinstance(request, pd.Index) or isinstance(request, list)):
+    if not isinstance(request, (list, pd.Index)):
         raise ValueError(
-            "Invalid request. Was expected an instance of str, pd.Index or List[str]"
-            f"got {type(request)} instead"
+            f"request must be str, list[str], pd.Index or None, got {type(request)}"
         )
 
-    list_to_return = []
+    max_level = get_tags_max_level(data_columns)
+
+    # Enrich columns once
+    enriched_map = {
+        col_name_tag_enrichment(col, max_level): col for col in data_columns
+    }
+
+    selected = []
+
     for req in request:
-        list_to_return.extend(find_cols_multiple_tag_groups(data_columns, req))
+        for group in req.split("|"):
+            group_tags = group.split("__")
+
+            if not (1 <= len(group_tags) <= 4):
+                raise ValueError(
+                    f"Request '{group}' is malformed. "
+                    "Use up to 4 tags separated by '__'."
+                )
+
+            for enriched_name, original in enriched_map.items():
+                tags = enriched_name.split("__")
+
+                # Exact per-tag match
+                if all(tag in tags for tag in group_tags):
+                    selected.append(original)
 
-    return list(dict.fromkeys(list_to_return))
+    return list(dict.fromkeys(selected))
 
 
 def data_columns_to_tree(columns: pd.Index | list[str]) -> T: