# Test Unified Security Requirements Crew

This notebook allows you to test and debug the unified crew architecture step-by-step.

**Setup:**
1. Make sure your `.env` file has OPENAI_API_KEY
2. Ensure Weaviate is running (for map_security_controls task)
3. Run cells in order (or jump to specific task)

**Features:**
- Test individual tasks or full crew
- Cache intermediate results
- Debug specific tasks (especially map_security_controls)
- Resume from any point


In [1]:
# Setup and imports
import json
import sys
import os
from pathlib import Path
from dotenv import load_dotenv

# Add src to path
sys.path.insert(0, str(Path.cwd() / "src"))

# Load environment
load_dotenv()

# Verify API key is loaded
if not os.getenv("OPENAI_API_KEY"):
    print("‚ö†Ô∏è  WARNING: OPENAI_API_KEY not found in environment!")
    print("   Please add it to your .env file")
else:
    print("‚úì OpenAI API key loaded")

print("‚úì Setup complete")

‚úì OpenAI API key loaded
‚úì Setup complete


In [2]:
# Weaviate Configuration and Health Check
import weaviate

# Set Weaviate connection parameters
os.environ.setdefault("WEAVIATE_HOST", "localhost")
os.environ.setdefault("WEAVIATE_PORT", "8080")
os.environ.setdefault("WEAVIATE_GRPC_PORT", "50051")

print("Checking Weaviate connection...")
try:
    client = weaviate.connect_to_local(
        host=os.getenv("WEAVIATE_HOST"),
        port=int(os.getenv("WEAVIATE_PORT")),
        grpc_port=int(os.getenv("WEAVIATE_GRPC_PORT")),
    )

    # Check if connected
    if client.is_ready():
        print("‚úì Weaviate is connected and ready")

        # Check if SecurityControl collection exists
        if client.collections.exists("SecurityControl"):
            collection = client.collections.get("SecurityControl")
            print("‚úì SecurityControl collection exists")

            # Try a simple query to verify it works
            try:
                response = collection.query.near_text(query="authentication", limit=1)
                print(f"‚úì Test query successful (found {len(response.objects)} results)")
            except Exception as e:
                print(f"‚ö†Ô∏è  Test query failed: {e}")
        else:
            print("‚ö†Ô∏è  WARNING: SecurityControl collection does not exist!")
            print("   Run: python -m security_requirements_system.tools.weaviate_setup")
    else:
        print("‚ö†Ô∏è  WARNING: Weaviate is not ready")

    client.close()

except Exception as e:
    print(f"‚ùå ERROR: Cannot connect to Weaviate: {e}")
    print(f"   Make sure Weaviate is running on {os.getenv('WEAVIATE_HOST')}:{os.getenv('WEAVIATE_PORT')}")
    print("   Start with: docker-compose up -d")
    print("\nWeaviate Configuration:")
    print(f"  Host: {os.getenv('WEAVIATE_HOST')}")
    print(f"  Port: {os.getenv('WEAVIATE_PORT')}")
    print(f"  gRPC Port: {os.getenv('WEAVIATE_GRPC_PORT')}")

Checking Weaviate connection...
‚úì Weaviate is connected and ready
‚úì SecurityControl collection exists
‚úì Test query successful (found 1 results)


In [3]:
# Helper functions

# Determine project root directory
_current_dir = Path.cwd()
PROJECT_ROOT = _current_dir

# Look for project root indicators
max_depth = 5
for depth in range(max_depth):
    if (PROJECT_ROOT / "test_outputs").exists() or (PROJECT_ROOT / "pyproject.toml").exists() or (PROJECT_ROOT / "src").exists():
        break
    parent = PROJECT_ROOT.parent
    if parent == PROJECT_ROOT:
        break
    PROJECT_ROOT = parent
else:
    if "tests" in str(_current_dir):
        PROJECT_ROOT = _current_dir.parent

TEST_OUTPUTS_DIR = PROJECT_ROOT / "test_outputs" / "unified_crew"
TEST_OUTPUTS_DIR.mkdir(exist_ok=True, parents=True)


def save_task_output(task_name, result, output_dir=None):
    """Save task output to JSON file."""
    if output_dir is None:
        output_path = TEST_OUTPUTS_DIR
    else:
        output_path = Path(output_dir)

    output_path.mkdir(exist_ok=True, parents=True)
    output_file = output_path / f"{task_name}_output.json"

    data = {
        "task_name": task_name,
        "raw": result.raw if hasattr(result, "raw") else str(result),
    }

    if hasattr(result, "pydantic") and result.pydantic:
        data["pydantic"] = result.pydantic.model_dump()

    with open(output_file, "w") as f:
        json.dump(data, f, indent=2, default=str)

    print(f"\nüíæ Saved to: {output_file}")
    return output_file


def load_task_output(task_name, output_dir=None):
    """Load cached task output from JSON file if it exists."""
    if output_dir is None:
        output_path = TEST_OUTPUTS_DIR
    else:
        output_path = Path(output_dir)

    output_file = output_path / f"{task_name}_output.json"

    if output_file.exists():
        with open(output_file, "r") as f:
            data = json.load(f)
        print(f"üìÇ Loaded cached output from: {output_file}")
        return data
    else:
        return None


def display_task_output(result, task_name):
    """Display task output in a readable format."""
    print(f"\n{'='*60}")
    print(f"{task_name} OUTPUT")
    print(f"{'='*60}\n")

    if hasattr(result, "raw"):
        print("RAW OUTPUT (first 500 chars):")
        print(result.raw[:500] + "..." if len(result.raw) > 500 else result.raw)

    if hasattr(result, "pydantic") and result.pydantic:
        print("\nSTRUCTURED OUTPUT:")
        print(json.dumps(result.pydantic.model_dump(), indent=2, default=str)[:1000])


print("‚úì Helper functions loaded")
print(f"  Project root: {PROJECT_ROOT}")
print(f"  Test outputs dir: {TEST_OUTPUTS_DIR}")
print(f"  Test outputs exists: {TEST_OUTPUTS_DIR.exists()}")

‚úì Helper functions loaded
  Project root: /Users/savvas/Library/CloudStorage/OneDrive-BTHStudent/4 THESIS/thesis-code/security_requirements_system
  Test outputs dir: /Users/savvas/Library/CloudStorage/OneDrive-BTHStudent/4 THESIS/thesis-code/security_requirements_system/test_outputs/unified_crew
  Test outputs exists: True


In [4]:
# Sample requirements text

SAMPLE_REQUIREMENTS = """
Task Management System Requirements:

1. User Management
   - User registration and login
   - Multi-factor authentication
   - Password reset functionality
   - Profile management

2. Task Management
   - Create, edit, and delete tasks
   - Assign tasks to users
   - Set task priorities and deadlines
   - Task status tracking (todo, in-progress, done)

3. Project Management
   - Create and manage projects
   - Assign team members to projects
   - Project-level permissions

4. Reporting
   - Task completion reports
   - User activity reports
   - Project progress dashboards

5. Admin Panel
   - User management
   - System configuration
   - Audit logs
"""

print("‚úì Sample requirements loaded")
print(f"Length: {len(SAMPLE_REQUIREMENTS)} characters")

‚úì Sample requirements loaded
Length: 661 characters


---

## Test Individual Tasks

Test each task individually to debug issues. Tasks are executed sequentially, so each task can access outputs from previous tasks via context.

### Task 5: Map Security Controls (DEBUG FOCUS)

**This is where the code froze!** Let's test this task specifically with detailed debugging.


In [None]:
from security_requirements_system.crew import SecurityRequirementsCrew
from security_requirements_system.data_models import DomainSecurityOutput, AnalysisOutput
from security_requirements_system.tools.weaviate_tool import WeaviateQueryTool

print("Testing Task 5: Map Security Controls (DEBUG MODE)...\n")

# Load previous outputs if not in memory
if "analysis_output" not in globals():
    cached = load_task_output("analyze_requirements")
    if cached and "pydantic" in cached:
        analysis_output = AnalysisOutput(**cached["pydantic"])
    else:
        print("‚ö†Ô∏è  Loading analysis_output from full crew run...")
        # Run first task to get analysis_output
        crew_instance = SecurityRequirementsCrew()
        crew = crew_instance.crew()
        result = crew.kickoff(inputs={"requirements_text": SAMPLE_REQUIREMENTS})
        if result.tasks_output:
            analysis_output = result.tasks_output[0].pydantic

# First, test the WeaviateQueryTool directly
print("\n1. Testing WeaviateQueryTool directly...")
try:
    tool = WeaviateQueryTool()
    test_result = tool._run(query="authentication", limit=3)
    print(f"‚úì Tool test successful")
    print(f"   Result preview: {test_result[:200]}...")
except Exception as e:
    print(f"‚ùå Tool test failed: {e}")
    import traceback

    traceback.print_exc()

# Check for cached output
cached = load_task_output("map_security_controls")
if cached:
    print("\n‚ö†Ô∏è  Found cached output. Delete the cache file to re-run.")
    if "pydantic" in cached:
        domain_output = DomainSecurityOutput(**cached["pydantic"])
        print(f"‚úì Loaded cached output")
else:
    print("\n2. Running map_security_controls task...")
    print("   This may take a while as it queries Weaviate for each requirement...")

    # Run crew - it will use context from previous tasks
    crew_instance = SecurityRequirementsCrew()
    crew = crew_instance.crew()

    try:
        result = crew.kickoff(inputs={"requirements_text": SAMPLE_REQUIREMENTS})

        # Extract fifth task output
        if len(result.tasks_output) >= 5:
            task_result = result.tasks_output[4]  # Fifth task
            display_task_output(task_result, "Map Security Controls")
            save_task_output("map_security_controls", task_result)

            if task_result.pydantic:
                domain_output = task_result.pydantic
                print(f"\n‚úì Requirements mapped: {len(domain_output.requirements_mapping) if domain_output.requirements_mapping else 0}")

                # Count controls
                total_controls = 0
                if domain_output.requirements_mapping:
                    for rm in domain_output.requirements_mapping:
                        total_controls += len(rm.security_controls) if rm.security_controls else 0
                print(f"‚úì Total controls mapped: {total_controls}")
        else:
            print(f"‚ö†Ô∏è  Expected 5 tasks, got {len(result.tasks_output)}")
            print(f"   Completed tasks: {[t.name for t in result.tasks_output]}")
    except KeyboardInterrupt:
        print("\n‚ö†Ô∏è  Task interrupted by user (KeyboardInterrupt)")
        print("   This suggests the task is taking too long or hanging")
        print("   Check Weaviate connection and tool configuration")
    except Exception as e:
        print(f"\n‚ùå Error during task execution: {e}")
        import traceback

        traceback.print_exc()

---

## Test Full Crew Execution

Run the entire crew from start to finish.


In [None]:
# Test full crew execution
print("Testing Full Crew Execution...\n")
print("This will run all 9 tasks sequentially.\n")

crew_instance = SecurityRequirementsCrew()
crew = crew_instance.crew()

try:
    result = crew.kickoff(inputs={"requirements_text": SAMPLE_REQUIREMENTS})

    print(f"\n{'='*60}")
    print(f"CREW EXECUTION COMPLETE")
    print(f"{'='*60}\n")

    print(f"Total tasks completed: {len(result.tasks_output)}\n")

    task_names = [
        "analyze_requirements",
        "analyze_architecture",
        "analyze_stakeholders_and_compliance",
        "perform_threat_modeling",
        "map_security_controls",
        "identify_ai_security_requirements",
        "design_security_architecture",
        "create_implementation_and_testing_plan",
        "validate_security_requirements",
    ]

    for i, task in enumerate(result.tasks_output):
        task_name = task.name if hasattr(task, "name") else task_names[i] if i < len(task_names) else f"Task {i+1}"
        status = "‚úì" if hasattr(task, "pydantic") and task.pydantic else "‚ö†"
        print(f"{status} {i+1}. {task_name}")

        # Save each task output
        if i < len(task_names):
            save_task_output(task_names[i], task)

    # Show final validation
    if len(result.tasks_output) >= 9:
        validation_task = result.tasks_output[8]
        if validation_task.pydantic:
            from security_requirements_system.data_models import ValidationOutput

            validation_output = validation_task.pydantic
            print(f"\n{'='*60}")
            print(f"FINAL VALIDATION")
            print(f"{'='*60}")
            print(f"Overall Score: {validation_output.overall_score:.2f}")
            print(f"Validation Passed: {validation_output.validation_passed}")
            print(f"\nDimension Scores:")
            for dim, score in validation_output.dimension_scores.items():
                print(f"  - {dim}: {score:.2f}")

except KeyboardInterrupt:
    print("\n‚ö†Ô∏è  Execution interrupted by user")
    print("   Check which task was running when interrupted")
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback

    traceback.print_exc()

In [None]:
# Test WeaviateQueryTool with different queries
print("Testing WeaviateQueryTool with various queries...\n")

from security_requirements_system.tools.weaviate_tool import WeaviateQueryTool

tool = WeaviateQueryTool()

test_queries = [
    ("authentication", 5),
    ("encryption", 5),
    ("access control", 5),
    ("input validation", 3),
]

for query, limit in test_queries:
    try:
        print(f"\nQuery: '{query}' (limit={limit})")
        result = tool._run(query=query, limit=limit)
        print(f"‚úì Success: {len(result)} characters")
        print(f"   Preview: {result[:150]}...")
    except Exception as e:
        print(f"‚ùå Failed: {e}")
        import traceback

        traceback.print_exc()

In [None]:
# List all cached outputs
print("Cached Task Outputs:\n")

cached_files = list(TEST_OUTPUTS_DIR.glob("*_output.json"))
if cached_files:
    for file in sorted(cached_files):
        size = file.stat().st_size / 1024  # KB
        print(f"  - {file.name} ({size:.2f} KB)")
else:
    print("  No cached outputs found")
    print(f"  Output directory: {TEST_OUTPUTS_DIR}")

In [None]:
# Inspect a specific cached task output
task_to_inspect = "map_security_controls"  # Change this to inspect different tasks

cached = load_task_output(task_to_inspect)
if cached:
    print(f"Inspecting {task_to_inspect}:\n")
    print(f"Raw output length: {len(cached.get('raw', ''))} characters")

    if "pydantic" in cached:
        print(f"\nStructured output keys: {list(cached['pydantic'].keys())}")

        # Pretty print the structured output
        print("\nStructured Output:")
        print(json.dumps(cached["pydantic"], indent=2, default=str)[:2000])
    else:
        print("\nRaw Output (first 1000 chars):")
        print(cached.get("raw", "")[:1000])
else:
    print(f"No cached output found for {task_to_inspect}")