Agent-Hellboy · Agent-Hellboy · Sep 4, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/.github/workflows/component-tests.yml b/.github/workflows/component-tests.yml
@@ -0,0 +1,130 @@
+name: Component Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      components:
+        description: "Comma-separated components to run (auth,cli,client,fuzz_engine,safety[safety_system],transport)"
+        required: false
+        default: ""
+
+permissions:
+  contents: read
+  id-token: write
+  actions: read
+
+jobs:
+  component-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .
+          pip install pytest pytest-cov pytest-asyncio
+
+      - name: Determine changed components
+        id: changes
+        run: |
+          # If manual input is provided, use that. Otherwise, run all components.
+          INPUTS="${{ github.event.inputs.components }}"
+          if [ -n "$INPUTS" ]; then
+            AUTH_CHANGES=false
+            CLI_CHANGES=false
+            CLIENT_CHANGES=false
+            FUZZ_ENGINE_CHANGES=false
+            SAFETY_CHANGES=false
+            TRANSPORT_CHANGES=false
+            IFS=',' read -ra TOKENS <<< "$INPUTS"
+            for t in "${TOKENS[@]}"; do
+              t="${t//[[:space:]]/}"
+              case "$t" in
+                auth) AUTH_CHANGES=true ;;
+                cli) CLI_CHANGES=true ;;
+                client) CLIENT_CHANGES=true ;;
+                fuzz_engine) FUZZ_ENGINE_CHANGES=true ;;
+                safety|safety_system) SAFETY_CHANGES=true ;;
+                transport) TRANSPORT_CHANGES=true ;;
+              esac
+            done
+          else
+            # Default to running all components on manual trigger
+            AUTH_CHANGES=true
+            CLI_CHANGES=true
+            CLIENT_CHANGES=true
+            FUZZ_ENGINE_CHANGES=true
+            SAFETY_CHANGES=true
+            TRANSPORT_CHANGES=true
+          fi
+
+          echo "auth=$AUTH_CHANGES" >> $GITHUB_OUTPUT
+          echo "cli=$CLI_CHANGES" >> $GITHUB_OUTPUT
+          echo "client=$CLIENT_CHANGES" >> $GITHUB_OUTPUT
+          echo "fuzz_engine=$FUZZ_ENGINE_CHANGES" >> $GITHUB_OUTPUT
+          echo "safety=$SAFETY_CHANGES" >> $GITHUB_OUTPUT
+          echo "transport=$TRANSPORT_CHANGES" >> $GITHUB_OUTPUT
+
+      - name: Run auth tests
+        if: steps.changes.outputs.auth == 'true'
+        run: pytest -vv tests/unit/auth --cov=mcp_fuzzer.auth --cov-report=xml:coverage.auth.xml
+
+      - name: Run CLI tests
+        if: steps.changes.outputs.cli == 'true'
+        run: pytest -vv tests/unit/cli --cov=mcp_fuzzer.cli --cov-report=xml:coverage.cli.xml
+
+      - name: Run client tests
+        if: steps.changes.outputs.client == 'true'
+        run: pytest -vv tests/unit/client --cov=mcp_fuzzer.client --cov-report=xml:coverage.client.xml
+
+      - name: Run fuzz engine tests
+        if: steps.changes.outputs.fuzz_engine == 'true'
+        run: pytest -vv tests/unit/fuzz_engine --cov=mcp_fuzzer.fuzz_engine --cov-report=xml:coverage.fuzz_engine.xml
+
+      - name: Run safety system tests
+        if: steps.changes.outputs.safety == 'true'
+        run: pytest -vv tests/unit/safety_system --cov=mcp_fuzzer.safety_system --cov-report=xml:coverage.safety_system.xml
+
+      - name: Run transport tests
+        if: steps.changes.outputs.transport == 'true'
+        run: pytest -vv tests/unit/transport --cov=mcp_fuzzer.transport --cov-report=xml:coverage.transport.xml
+
+      - name: Run integration tests
+        if: ${{ steps.changes.outputs.auth == 'true' || steps.changes.outputs.cli == 'true' || steps.changes.outputs.client == 'true' || steps.changes.outputs.fuzz_engine == 'true' || steps.changes.outputs.safety == 'true' || steps.changes.outputs.transport == 'true' }}
+        run: |
+          pytest -vv tests/integration --cov=mcp_fuzzer --cov-report=xml:coverage.integration.xml
+
+      - name: Check for coverage files
+        id: coverage_check
+        run: |
+          if ls coverage.*.xml 1> /dev/null 2>&1; then
+            echo "has_coverage=true" >> $GITHUB_OUTPUT
+          else
+            echo "has_coverage=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Check Codecov token
+        id: codecov_token
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        run: |
+          if [ -n "$CODECOV_TOKEN" ]; then
+            echo "has_token=true" >> $GITHUB_OUTPUT
+          else
+            echo "has_token=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Upload coverage to Codecov
+        if: ${{ (steps.changes.outputs.auth == 'true' || steps.changes.outputs.cli == 'true' || steps.changes.outputs.client == 'true' || steps.changes.outputs.fuzz_engine == 'true' || steps.changes.outputs.safety == 'true' || steps.changes.outputs.transport == 'true') && steps.coverage_check.outputs.has_coverage == 'true' && steps.codecov_token.outputs.has_token == 'true' }}
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: coverage.*.xml
+          fail_ci_if_error: true
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -2,28 +2,60 @@ name: Tests
 
 on:
   push:
-    branches: [main, master]
+    branches: [ main ]
   pull_request:
-    branches: [main, master]
+    branches: [ main ]
+  workflow_dispatch:
+    inputs:
+      reason:
+        description: "Why are you running the test workflow?"
+        required: false
+        default: "manual run"
+      pytest_args:
+        description: "Optional pytest args (e.g., -m 'unit and fuzz_engine')"
+        required: false
+        default: ""
+
+permissions:
+  contents: read
+  id-token: write
+
+concurrency:
+  group: tests-${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   tests:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: '3.11'
+          cache: 'pip'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install .
           pip install pytest pytest-cov pytest-asyncio
       - name: Run tests with coverage
         run: |
-          pytest -vv
+          pytest -vv --cov=mcp_fuzzer --cov-report=xml ${{ github.event.inputs.pytest_args }}
+      - name: Check Codecov token
+        id: codecov_token
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        run: |
+          if [ -n "$CODECOV_TOKEN" ]; then
+            echo "has_token=true" >> $GITHUB_OUTPUT
+          else
+            echo "has_token=false" >> $GITHUB_OUTPUT
+          fi
       - name: Upload coverage to Codecov
+        if: ${{ steps.codecov_token.outputs.has_token == 'true' && hashFiles('coverage.xml') != '' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false) }}
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -244,6 +244,7 @@ The fuzzing engine orchestrates the testing process and manages test execution.
 
 - `tool_fuzzer.py`: Tests individual tools with various argument combinations
 - `protocol_fuzzer.py`: Tests MCP protocol types with various message structures
+- `invariants.py`: Implements property-based invariants and checks for fuzz testing
 - `executor.py`: Provides asynchronous execution framework with concurrency control and retry mechanisms
 
 **Fuzzing Process:**
@@ -252,7 +253,8 @@ The fuzzing engine orchestrates the testing process and manages test execution.
 2. **Strategy Selection**: Choose appropriate fuzzing strategy (realistic vs aggressive)
 3. **Data Generation**: Generate test data using Hypothesis and custom strategies
 4. **Execution**: Execute tests with controlled concurrency via AsyncFuzzExecutor
-5. **Analysis**: Analyze results and generate reports
+5. **Invariant Verification**: Verify responses against property-based invariants
+6. **Analysis**: Analyze results and generate reports
 
 ### 4. Strategy System
 
@@ -263,13 +265,43 @@ The strategy system generates test data using different approaches.
 - `realistic/`: Generates valid, realistic data for functionality testing
 - `aggressive/`: Generates malicious/malformed data for security testing
 - `strategy_manager.py`: Orchestrates strategy selection and execution
+- `schema_parser.py`: Parses JSON Schema definitions to generate appropriate test data
 
 **Strategy Types:**
 
 - **Realistic Strategies**: Generate valid Base64, UUIDs, timestamps, semantic versions
 - **Aggressive Strategies**: Generate SQL injection, XSS, path traversal, buffer overflow attempts
 
-### 5. Safety System
+**Schema Parser:**
+
+The schema parser provides comprehensive support for parsing JSON Schema definitions and generating appropriate test data based on schema specifications. It handles:
+
+- Basic types: string, number, integer, boolean, array, object, null
+- String constraints: minLength, maxLength, pattern, format
+- Number/Integer constraints: minimum, maximum, exclusiveMinimum, exclusiveMaximum, multipleOf
+- Array constraints: minItems, maxItems, uniqueItems
+- Object constraints: required properties, minProperties, maxProperties
+- Schema combinations: oneOf, anyOf, allOf
+- Enums and constants
+
+The module supports both "realistic" and "aggressive" fuzzing strategies, where realistic mode generates valid data conforming to the schema, while aggressive mode intentionally generates edge cases and invalid data to test error handling.
+
+### 5. Invariants System
+
+The invariants system provides property-based testing capabilities to verify response validity, error type correctness, and prevention of unintended crashes or unexpected states during fuzzing.
+
+**Key Components:**
+
+- `check_response_validity`: Ensures responses follow JSON-RPC 2.0 specification
+- `check_error_type_correctness`: Verifies error responses have correct structure and codes
+- `check_response_schema_conformity`: Validates responses against JSON schema definitions
+- `verify_response_invariants`: Orchestrates multiple invariant checks on a single response
+- `verify_batch_responses`: Applies invariant checks to batches of responses
+- `check_state_consistency`: Ensures server state remains consistent during fuzzing
+
+These invariants serve as runtime assertions that validate the behavior of the server being tested, helping to identify potential issues that might not be caught by simple error checking.
+
+### 6. Safety System
 
 The safety system provides multiple layers of protection against dangerous operations.
 

diff --git a/docs/reference.md b/docs/reference.md
@@ -113,13 +113,15 @@ mcp_fuzzer/
       protocol_fuzzer.py   # Orchestrates protocol-type fuzzing
       tool_fuzzer.py       # Orchestrates tool fuzzing
     strategy/
+      schema_parser.py     # JSON Schema parser for test data generation
       strategy_manager.py  # Selects strategies per phase/type
       realistic/
         tool_strategy.py
         protocol_type_strategy.py
       aggressive/
         tool_strategy.py
         protocol_type_strategy.py
+    invariants.py         # Property-based invariants and checks
     runtime/
       manager.py           # Async ProcessManager (start/stop, signals)
       watchdog.py          # ProcessWatchdog (hang detection)
@@ -143,6 +145,74 @@ mcp_fuzzer/
   client.py                # UnifiedMCPFuzzerClient orchestrator
 ```
 
+## Schema Parser
+
+The schema parser module (`mcp_fuzzer.fuzz_engine.strategy.schema_parser`) provides comprehensive support for parsing JSON Schema definitions and generating appropriate test data based on schema specifications.
+
+### Features
+
+- **Basic Types**: Handles string, number, integer, boolean, array, object, and null types
+- **String Constraints**: Supports minLength, maxLength, pattern, and format validations
+- **Number/Integer Constraints**: Handles minimum, maximum, exclusiveMinimum, exclusiveMaximum, multipleOf
+- **Array Constraints**: Supports minItems, maxItems, uniqueItems
+- **Object Constraints**: Handles required properties, minProperties, additionalProperties (false blocks extra properties)
+- **Schema Combinations**: Processes oneOf, anyOf, allOf schema combinations with proper constraint merging
+- **Enums and Constants**: Supports enum values and const keyword (both in realistic and aggressive modes)
+- **Fuzzing Phases**: Supports both "realistic" (valid) and "aggressive" (edge cases) modes
+
+### Example Usage
+
+```python
+from mcp_fuzzer.fuzz_engine.strategy.schema_parser import make_fuzz_strategy_from_jsonschema
+
+# Define a JSON schema
+schema = {
+    "type": "object",
+    "properties": {
+        "name": {"type": "string", "minLength": 3, "maxLength": 50},
+        "age": {"type": "integer", "minimum": 18, "maximum": 120},
+        "email": {"type": "string", "format": "email"}
+    },
+    "required": ["name", "age"]
+}
+
+# Generate realistic data
+realistic_data = make_fuzz_strategy_from_jsonschema(schema, phase="realistic")
+
+# Generate aggressive data for security testing
+aggressive_data = make_fuzz_strategy_from_jsonschema(schema, phase="aggressive")
+```
+
+## Invariants System
+
+The invariants module (`mcp_fuzzer.fuzz_engine.invariants`) provides property-based testing capabilities to verify response validity, error type correctness, and prevention of unintended crashes or unexpected states during fuzzing.
+
+### Features
+
+- **Response Validity**: Ensures responses follow JSON-RPC 2.0 specification
+- **Error Type Correctness**: Verifies error responses have correct structure and codes
+- **Schema Conformity**: Validates responses against JSON schema definitions
+- **Batch Verification**: Applies invariant checks to batches of responses
+- **State Consistency**: Ensures server state remains consistent during fuzzing
+
+### Example Usage
+
+```python
+from mcp_fuzzer.fuzz_engine.invariants import verify_response_invariants, InvariantViolation
+
+# Verify a response against invariants
+try:
+    verify_response_invariants(
+        response={"jsonrpc": "2.0", "id": 1, "result": "success"},
+        expected_error_codes=[400, 404, 500],
+        schema={"type": "object", "properties": {"result": {"type": "string"}}}
+    )
+    # Response is valid
+except InvariantViolation as e:
+    # Invariant violation detected
+    print(f"Violation: {e}")
+```
+
 - Strategy: Generates inputs for tools and protocol types in two phases:
   - realistic (valid/spec-conformant), aggressive (malformed/attack vectors).
 - Fuzzer: Runs strategies, sends envelopes via a transport, and records results.

diff --git a/mcp_fuzzer/fuzz_engine/executor.py b/mcp_fuzzer/fuzz_engine/executor.py
@@ -169,9 +169,8 @@ async def execute_batch(
         """
 
         async def _bounded_execute_and_track(op, args, kwargs):
-            # Acquire semaphore before execution and release after
-            async with self._semaphore:
-                return await self._execute_and_track(op, args, kwargs)
+            # Concurrency is enforced inside execute(); avoid double-acquire deadlock
+            return await self._execute_and_track(op, args, kwargs)
 
         # Create bounded tasks that respect the semaphore limit
         tasks = []
@@ -242,3 +241,12 @@ async def shutdown(self, timeout: float = 5.0) -> None:
                 "Shutdown timed out with %d tasks still running",
                 len(self._running_tasks),
             )
+            # Proactively cancel outstanding tasks and wait for them to finish
+            for task in list(self._running_tasks):
+                task.cancel()
+            await asyncio.gather(*self._running_tasks, return_exceptions=True)
+        finally:
-            # Proactively cancel outstanding tasks and wait for them to finish
-            for task in list(self._running_tasks):
-                task.cancel()
-            await asyncio.gather(*self._running_tasks, return_exceptions=True)
-        finally:
+            # Proactively cancel outstanding tasks and wait for them to finish
+            for task in list(self._running_tasks):
+                task.cancel()
+            try:
+                await asyncio.wait_for(
+                    asyncio.gather(*self._running_tasks, return_exceptions=True),
+                    timeout=min(2.0, timeout),
+                )
+            except asyncio.TimeoutError:
+                self._logger.error(
+                    "Forced shutdown still timed out; %d tasks may still be running",
+                    sum(1 for t in self._running_tasks if not t.done()),
+                )
+        finally:
-            # Proactively cancel outstanding tasks and wait for them to finish
-            for task in list(self._running_tasks):
-                task.cancel()
-            await asyncio.gather(*self._running_tasks, return_exceptions=True)
-        finally:
+            # Proactively cancel outstanding tasks and wait for them to finish
+            for task in list(self._running_tasks):
+                task.cancel()
+            try:
+                await asyncio.wait_for(
+                    asyncio.gather(*self._running_tasks, return_exceptions=True),
+                    timeout=min(2.0, timeout),
+                )
+            except asyncio.TimeoutError:
+                self._logger.error(
+                    "Forced shutdown still timed out; %d tasks may still be running",
+                    sum(1 for t in self._running_tasks if not t.done()),
+                )
+        finally:
+            # Ensure the set is cleaned up
+            self._running_tasks = {
+                t for t in self._running_tasks if not t.cancelled() and not t.done()
+            }
-            # Ensure the set is cleaned up
-            self._running_tasks = {
-                t for t in self._running_tasks if not t.cancelled() and not t.done()
-            }
+            # Ensure the set is cleaned up without rebinding (callbacks rely on identity)
+            to_remove = {t for t in tuple(self._running_tasks) if t.done() or t.cancelled()}
+            self._running_tasks.difference_update(to_remove)
-            # Ensure the set is cleaned up
-            self._running_tasks = {
-                t for t in self._running_tasks if not t.cancelled() and not t.done()
-            }
+            # Ensure the set is cleaned up without rebinding (callbacks rely on identity)
+            to_remove = {t for t in tuple(self._running_tasks) if t.done() or t.cancelled()}
+            self._running_tasks.difference_update(to_remove)