AISecurityLab · Nicola Franco (franconicola) · May 29, 2026 · May 28, 2026 · May 29, 2026 · May 29, 2026
diff --git a/docs/docs/datasets/index.md b/docs/docs/datasets/index.md
@@ -13,6 +13,7 @@ Instead of manually specifying `goals`, use the `dataset` parameter to load goal
 - 🎯 **Presets** — 30+ ready-to-use AI safety benchmarks (AgentHarm, JailbreakBench, BeaverTails, etc.)
 - 🤗 **HuggingFace Hub** — Any public or private dataset from HuggingFace
 - 📁 **Local files** — JSON, JSONL, CSV, or TXT files from your filesystem
+- 🧭 **Intent taxonomy selection** — Pick OmniSafeBench categories/subcategories with `intents`
 
 ```mermaid
 graph LR
@@ -129,6 +130,32 @@ attack_config = {
 results = agent.hack(attack_config=attack_config)
 ```
 
+### 4. Selecting Intent Categories (OmniSafeBench)
+
+When you want category-balanced goals without manually writing prompts, use
+`intents` to select categories and subcategories directly from the
+OmniSafeBench taxonomy.
+
+```python
+attack_config = {
+    "attack_type": "h4rm3l",
+    "intents": [
+        {
+            "category": "A",
+            "subcategories": ["A1", "A2"],
+            "samples_per_subcategory": 2,
+        }
+    ],
+}
+```
+
+HackAgent maps this to canonical labels in results/dashboard format:
+`A. Ethical and Social Risks` / `A1. Bias and Discrimination`.
+
+Taxonomy source: [OmniSafeBench-MM](https://github.com/jiaxiaojunQAQ/OmniSafeBench-MM/).
+
+[See full guide: Selecting intent categories →](./selecting-intent-categories.md)
+
 ---
 
 ## Common Dataset Options
@@ -172,6 +199,7 @@ When both `shuffle` and `offset` are used, shuffling happens **first**, then off
 ## Next Steps
 
 - 📖 [**Datasets Tutorial**](../getting-started/datasets-tutorial.mdx) — Complete walkthrough with examples
+- 🧭 [**Selecting intent categories**](./selecting-intent-categories.md) — Use taxonomy categories/subcategories with strings, enums, or label codes
 - 🎯 [**Presets**](./presets.md) — All 30+ pre-configured benchmarks
 - 🤗 [**HuggingFace Provider**](./huggingface.md) — Load any HuggingFace dataset
 - 📁 [**File Provider**](./file.md) — Load from local JSON, CSV, or TXT files

diff --git a/docs/docs/datasets/selecting-intent-categories.md b/docs/docs/datasets/selecting-intent-categories.md
diff --git a/docs/docs/getting-started/datasets-tutorial.mdx b/docs/docs/getting-started/datasets-tutorial.mdx
@@ -7,6 +7,10 @@ sidebar_position: 2
 This quick-start tutorial covers only the basics you need to start using datasets in HackAgent.
 Presets are pre-configured benchmark datasets. They are the fastest way to run standardized evaluations.
 
+If you want to select goals by risk taxonomy (OmniSafeBench) instead of full datasets,
+you can use `intents` with categories/subcategories. See
+[Selecting intent categories](../datasets/selecting-intent-categories) for details.
+
 ### Basic CLI Example
 
 ```bash

diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -71,6 +71,7 @@ const sidebars: SidebarsConfig = {
         id: 'datasets/index',
       },
       items: [
+        'datasets/selecting-intent-categories',
         'datasets/presets',
         'datasets/huggingface',
         'datasets/file',

diff --git a/hackagent/attacks/orchestrator.py b/hackagent/attacks/orchestrator.py
@@ -195,19 +195,33 @@ def _prepare_attack_params(self, attack_config: Dict[str, Any]) -> Dict[str, Any
         # Check for direct goals first
         goals = attack_config.get("goals")
         dataset_config = attack_config.get("dataset")
+        intents_config = attack_config.get("intents")
+        goal_labels_by_index: Optional[Dict[int, Dict[str, str]]] = None
 
         if goals is not None and dataset_config is not None:
             logger.warning(
                 "Both 'goals' and 'dataset' provided. Using 'goals' directly."
             )
             dataset_config = None
+        if goals is not None and intents_config is not None:
+            logger.warning(
+                "Both 'goals' and 'intents' provided. Using 'goals' directly."
+            )
+            intents_config = None
+
+        if intents_config is not None and dataset_config is not None:
+            logger.warning("Both 'intents' and 'dataset' provided. Using 'intents'.")
+            dataset_config = None
 
-        if dataset_config is not None:
+        if intents_config is not None:
+            goals, goal_labels_by_index = self._load_goals_from_intents(intents_config)
+        elif dataset_config is not None:
             # Load goals from dataset source
             goals = self._load_goals_from_dataset(dataset_config)
         elif goals is None:
             raise ValueError(
-                f"'{self.attack_type}' requires either 'goals' (list) or 'dataset' (config)"
+                f"'{self.attack_type}' requires either 'goals' (list), "
+                "'dataset' (config), or 'intents' (config)"
             )
 
         if not isinstance(goals, list):
@@ -217,7 +231,10 @@ def _prepare_attack_params(self, attack_config: Dict[str, Any]) -> Dict[str, Any
             raise ValueError(f"'goals' list is empty for {self.attack_type}")
 
         logger.info(f"Prepared {len(goals)} goals for {self.attack_type} attack")
-        return {"goals": goals}
+        params: Dict[str, Any] = {"goals": goals}
+        if goal_labels_by_index:
+            params["_goal_labels_by_index"] = goal_labels_by_index
+        return params
 
     @staticmethod
     def _uses_default_category_classifier(attack_config: Dict[str, Any]) -> bool:
@@ -354,6 +371,26 @@ def _load_goals_from_dataset(self, dataset_config: Dict[str, Any]) -> list:
             logger.error(f"Failed to load goals from dataset: {e}", exc_info=True)
             raise ValueError(f"Failed to load goals from dataset: {e}") from e
 
+    def _load_goals_from_intents(
+        self, intents_config: Any
+    ) -> Tuple[List[str], Dict[int, Dict[str, str]]]:
+        """Load goals from intent taxonomy labels and sample selectors."""
+        from hackagent.datasets.intents import load_goals_from_intents_config
+
+        logger.info("Loading goals from intents taxonomy config")
+
+        try:
+            goals, goal_labels_by_index = load_goals_from_intents_config(intents_config)
+            logger.info(
+                "Loaded %s goals from intents across %s labeled entries",
+                len(goals),
+                len(goal_labels_by_index),
+            )
+            return goals, goal_labels_by_index
+        except Exception as e:
+            logger.error(f"Failed to load goals from intents: {e}", exc_info=True)
+            raise ValueError(f"Failed to load goals from intents: {e}") from e
+
     def _get_attack_impl_kwargs(
         self,
         attack_config: Dict[str, Any],
@@ -664,9 +701,16 @@ def execute(
         """
         # 1. Validate parameters
         attack_params = self._prepare_attack_params(attack_config)
+        goal_labels_by_index = attack_params.pop("_goal_labels_by_index", None)
 
         # Fail-fast preflight before creating Attack/Run DB records.
-        self._validate_default_category_classifier_requirements(attack_config)
+        # Skip this when intents already provide explicit category labels.
+        if goal_labels_by_index:
+            logger.info(
+                "Using explicit intents taxonomy labels: category classifier preflight skipped"
+            )
+        else:
+            self._validate_default_category_classifier_requirements(attack_config)
 
         # Enrich run config with expected goal cardinality so downstream views
         # can keep RUNNING until all expected goals are fully tracked.
@@ -710,6 +754,13 @@ def execute(
         except Exception as e:
             logger.warning(f"Failed to update run status to RUNNING: {e}")
 
+        if goal_labels_by_index:
+            attack_config = {
+                **attack_config,
+                "_goal_labels_by_index": goal_labels_by_index,
+                "_disable_goal_category_classifier": True,
+            }
+
         # Make the event bus available to the technique impl and to the
         # tracker via the shared config bag (alongside _run_id / _backend).
         if _tui_event_bus is not None:

diff --git a/hackagent/attacks/techniques/base.py b/hackagent/attacks/techniques/base.py
@@ -243,6 +243,10 @@ def _initialize_coordinator(
             logger=self.logger,
             attack_type=attack_type,
             category_classifier_config=self.config.get("category_classifier"),
+            preclassified_goal_labels_by_index=self.config.get("_goal_labels_by_index"),
+            disable_goal_category_classifier=bool(
+                self.config.get("_disable_goal_category_classifier")
+            ),
             goals=goals,
             initial_metadata=initial_metadata,
             goal_index_start=goal_index_start,

diff --git a/hackagent/attacks/techniques/config.py b/hackagent/attacks/techniques/config.py
@@ -165,6 +165,7 @@ class GoalsDatasetConfig(BaseModel):
 
     goals: List[str] = Field(default_factory=list)
     dataset: Optional[Union[str, Dict[str, Any]]] = None
+    intents: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None
 
 
 class RunConfig(BaseModel):

diff --git a/hackagent/datasets/__init__.py b/hackagent/datasets/__init__.py
@@ -32,6 +32,11 @@
 """
 
 from hackagent.datasets.base import DatasetProvider
+from hackagent.datasets.intents import (
+    IntentCategory,
+    IntentSubcategory,
+    load_goals_from_intents_config,
+)
 from hackagent.datasets.presets import PRESETS, get_preset, list_presets
 from hackagent.datasets.registry import (
     get_provider,
@@ -42,9 +47,12 @@
 
 __all__ = [
     "DatasetProvider",
+    "IntentCategory",
+    "IntentSubcategory",
     "PRESETS",
     "get_preset",
     "get_provider",
+    "load_goals_from_intents_config",
     "list_presets",
     "load_goals",
     "load_goals_from_config",