From 8a08d29551ba7b4ed91191e6e94eb6e1ed316a7f Mon Sep 17 00:00:00 2001 From: Ahmed Lekssays Date: Tue, 7 Oct 2025 19:53:54 +0300 Subject: [PATCH 1/3] Migrate to FastMCP and Change transport to Streamable-HTTP --- Dockerfile | 35 -- README.md | 397 +++++++++------------ build.sh | 4 +- examples/sample.c | 120 ------- main.py | 137 +++++-- pytest.ini | 17 - requirements.txt | 27 +- src/__init__.py | 8 +- src/config.py | 116 +++++- src/models.py | 205 +++++++---- src/server.py | 832 ------------------------------------------- test_client.py | 135 ------- tests/conftest.py | 102 ------ tests/test_config.py | 105 ------ tests/test_models.py | 119 ------- tests/test_server.py | 220 ------------ tests/test_utils.py | 62 ---- validate.py | 146 -------- 18 files changed, 545 insertions(+), 2242 deletions(-) delete mode 100644 Dockerfile delete mode 100644 examples/sample.c delete mode 100644 pytest.ini delete mode 100644 src/server.py delete mode 100644 test_client.py delete mode 100644 tests/conftest.py delete mode 100644 tests/test_config.py delete mode 100644 tests/test_models.py delete mode 100644 tests/test_server.py delete mode 100644 tests/test_utils.py delete mode 100755 validate.py diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index bb59045..0000000 --- a/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Ubuntu 24.04 base image -FROM ubuntu:24.04 - -# Install dependencies -RUN apt-get update && \ - apt-get install -y \ - openjdk-21-jdk \ - python3 \ - python3-pip \ - curl \ - unzip \ - wget \ - git \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# Set JAVA_HOME for OpenJDK 21 -ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64 -ENV PATH=$JAVA_HOME/bin:$PATH - -# Download and install Joern via the official script -WORKDIR /opt -RUN curl -L https://github.com/joernio/joern/releases/download/v4.0.424/joern-install.sh -o joern-install.sh && \ - chmod +x joern-install.sh && \ - ./joern-install.sh && \ - rm joern-install.sh - -# Add Joern to PATH -ENV PATH="/opt/joern:$PATH" - -# Default working directory -WORKDIR /workspace - -# Default command: start Joern shell -CMD ["joern"] diff --git a/README.md b/README.md index 955c02f..559b37c 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,16 @@ # 🕷️ joern-mcp -A production-ready Model Context Protocol (MCP) server that provides AI assistants with static code analysis capabilities using Joern's Code Property Graph (CPG) technology. - -## Overview - -The Joern MCP Server enables AI coding assistants to perform sophisticated static code analysis by leveraging Joern's powerful CPG-based analysis in isolated Docker environments. It implements the Model Context Protocol standard, making it compatible with various AI assistants and development environments. +A Model Context Protocol (MCP) server that provides AI assistants with static code analysis capabilities using [Joern](https://joern.io)'s Code Property Graph (CPG) technology. ## Features -- **Static Code Analysis**: Deep code analysis using Joern's CPG technology -- **Multi-Language Support**: C/C++, Java, JavaScript/TypeScript, Python, Go, Kotlin, Scala, C# -- **Isolated Execution**: All analysis runs in secure Docker containers -- **Intelligent Caching**: Efficient CPG caching with configurable TTL -- **GitHub Integration**: Direct analysis of GitHub repositories -- **Production Ready**: Comprehensive error handling, logging, and monitoring -- **MCP Compliance**: Full Model Context Protocol implementation +- **Multi-Language Support**: Java, C/C++, JavaScript, Python, Go, Kotlin, Swift +- **Docker Isolation**: Each analysis session runs in a secure container +- **GitHub Integration**: Analyze repositories directly from GitHub URLs +- **Session-Based**: Persistent CPG sessions with automatic cleanup +- **Redis-Backed**: Fast caching and session management +- **Async Queries**: Non-blocking CPG generation and query execution +- **Built-in Security Queries**: Pre-configured queries for common vulnerabilities ## Quick Start @@ -22,212 +18,174 @@ The Joern MCP Server enables AI coding assistants to perform sophisticated stati - Python 3.8+ - Docker +- Redis - Git ### Installation -1. **Clone the repository**: - ```bash - git clone https://github.com/Lekssays/joern-mcp.git - cd joern-mcp - ``` - -2. **Install dependencies**: - ```bash - pip install -r requirements.txt - ``` - -3. **Build Joern Docker image**: - ```bash - # Option 1: Use the build script (recommended) - ./build.sh - - # Option 2: Build manually - docker build -t joern:latest . - ``` - -### Running the Server - -**Validate setup first**: +1. **Clone and install dependencies**: ```bash -python validate.py +git clone https://github.com/Lekssays/joern-mcp.git +cd joern-mcp +pip install -r requirements.txt ``` -**Basic usage**: +2. **Setup (builds Joern image and starts Redis)**: ```bash -python main.py +./setup.sh ``` -**With configuration file**: +3. **Configure** (optional): ```bash -python main.py config.yml +cp config.example.yaml config.yaml +# Edit config.yaml as needed ``` -**Using environment variables**: +4. **Run the server**: ```bash -export JOERN_DOCKER_IMAGE=joern:latest -export JOERN_CACHE_DIR=/tmp/joern_cache -export GITHUB_TOKEN=your_token_here python main.py +# Server will be available at http://localhost:4242 ``` -> **Note**: The `joern:latest` image is built locally using the included Dockerfile, not pulled from a registry. - -## Configuration - -Create a `config.yml` file for custom configuration: - -```yaml -docker: - image: "joern:latest" - cpu_limit: "2" - memory_limit: "4g" - timeout: 300 - network_mode: "none" - -cache: - enabled: true - max_size_gb: 10 - ttl_hours: 24 - directory: "/tmp/joern_cache" - -max_concurrent_analyses: 3 -github_token: "your_github_token" # Optional, for private repos -log_level: "INFO" -``` - -### Environment Variables +## Integration with GitHub Copilot -| Variable | Description | Default | -|----------|-------------|---------| -| `JOERN_DOCKER_IMAGE` | Joern Docker image | `joern:latest` | -| `JOERN_CPU_LIMIT` | CPU limit for containers | `2` | -| `JOERN_MEMORY_LIMIT` | Memory limit for containers | `4g` | -| `JOERN_TIMEOUT` | Container timeout (seconds) | `300` | -| `JOERN_CACHE_ENABLED` | Enable CPG caching | `true` | -| `JOERN_CACHE_SIZE_GB` | Cache size limit (GB) | `10` | -| `JOERN_CACHE_DIR` | Cache directory | `/tmp/joern_cache` | -| `GITHUB_TOKEN` | GitHub access token | - | -| `JOERN_LOG_LEVEL` | Logging level | `INFO` | +The server uses **Streamable HTTP** transport for network accessibility and supports multiple concurrent clients. -## Usage with AI Assistants +Add to your VS Code `settings.json`: -### VS Code with GitHub Copilot - -Add to VS Code `settings.json`: ```json { - "servers": { - "joern-mcp": { - "type": "stdio", - "command": "python", - "args": [ - "/path/to/joern-mcp/main.py" - ] - } - }, - "inputs": [] + "github.copilot.advanced": { + "mcp": { + "servers": { + "joern-mcp": { + "url": "http://localhost:4242/mcp", + } + } + } + } } ``` -### Claude Desktop - -Configure in Claude Desktop settings: -```json -{ - "mcp": { - "servers": [{ - "name": "joern-mcp", - "command": ["python", "main.py"], - "workingDirectory": "/path/to/joern-mcp" - }] - } -} +Make sure the server is running before using it with Copilot: +```bash +python main.py ``` ## Available Tools ### Core Tools -- **`load_project`**: Load code from GitHub URL or local path -- **`generate_cpg`**: Generate Code Property Graph for analysis -- **`run_query`**: Execute Joern queries against the CPG -- **`list_projects`**: List all loaded projects -- **`project_info`**: Get detailed project information -- **`cleanup_project`**: Clean up project resources - -### Pre-built Queries - -- **`list_queries`**: Access security, quality, and metrics queries - -#### Security Queries -- SQL injection detection -- XSS sink identification -- Hardcoded secrets discovery -- Unsafe deserialization patterns - -#### Quality Queries -- Complex methods detection -- Long methods identification -- Duplicate code analysis -- Unused variables discovery +- **`create_cpg_session`**: Initialize analysis session from local path or GitHub URL +- **`run_cpgql_query`**: Execute synchronous CPGQL queries with JSON output +- **`run_cpgql_query_async`**: Execute asynchronous queries with status tracking +- **`get_session_status`**: Check session state and metadata +- **`list_sessions`**: View active sessions with filtering +- **`close_session`**: Clean up session resources +- **`list_queries`**: Get pre-built security and quality queries -#### Metrics Queries -- Total methods/classes/files count -- Average cyclomatic complexity - -## Example Usage - -### Load and Analyze a Project +### Example Usage ```python -# Example MCP client interaction +# Create session from GitHub { - "tool": "load_project", + "tool": "create_cpg_session", "arguments": { - "source": "https://github.com/user/repo", - "branch": "main" + "source_type": "github", + "source_path": "https://github.com/user/repo", + "language": "java" } } +# Run query { - "tool": "generate_cpg", + "tool": "run_cpgql_query", "arguments": { - "project_id": "abc12345" + "session_id": "abc-123-def", + "query": "cpg.method.name.l" } } +``` -{ - "tool": "run_query", - "arguments": { - "project_id": "abc12345", - "query": "cpg.method.filter(_.cyclomaticComplexity > 10)" - } -} +### Pre-Built Queries + +The `list_queries` tool provides 20+ pre-configured queries including: + +**Security:** +- SQL injection detection +- XSS vulnerabilities +- Hardcoded secrets +- Command injection +- Path traversal + +**Memory Safety:** +- Buffer overflow risks +- Memory leaks +- Null pointer dereferences +- Uninitialized variables + +**Code Quality:** +- All methods/functions +- Control structures +- Function calls +- String literals + +## Configuration + +Key settings in `config.yaml`: + +```yaml +server: + host: 0.0.0.0 + port: 4242 + log_level: INFO + +redis: + host: localhost + port: 6379 + +sessions: + ttl: 3600 # Session timeout (seconds) + max_concurrent: 50 # Max concurrent sessions + +cpg: + generation_timeout: 600 # CPG generation timeout (seconds) + supported_languages: [java, c, cpp, javascript, python, go, kotlin, swift] ``` -### Common Queries +Environment variables override config file settings (e.g., `MCP_HOST`, `REDIS_HOST`, `SESSION_TTL`). + +## Example CPGQL Queries -**Find all functions**: +**Find all methods:** ```scala -cpg.method.l +cpg.method.name.l ``` -**Find function calls**: +**Find hardcoded secrets:** ```scala -cpg.call.l +cpg.literal.code("(?i).*(password|secret|api_key).*").l ``` -**Security analysis**: +**Find SQL injection risks:** ```scala -cpg.call.name(".*exec.*").code +cpg.call.name(".*execute.*").where(_.argument.isLiteral.code(".*SELECT.*")).l ``` -**Complex methods**: +**Find complex methods:** ```scala -cpg.method.filter(_.cyclomaticComplexity > 10) +cpg.method.filter(_.cyclomaticComplexity > 10).l ``` +## Architecture + +- **FastMCP Server**: Built on FastMCP 2.12.4 framework with **Streamable HTTP** transport +- **HTTP Transport**: Network-accessible API supporting multiple concurrent clients +- **Docker Containers**: One isolated Joern container per session +- **Redis**: Session state and query result caching +- **Async Processing**: Non-blocking CPG generation +- **CPG Caching**: Reuse CPGs for identical source/language combinations + ## Development ### Project Structure @@ -235,97 +193,85 @@ cpg.method.filter(_.cyclomaticComplexity > 10) ``` joern-mcp/ ├── src/ -│ ├── __init__.py -│ ├── server.py # Main server implementation -│ ├── models.py # Data models and exceptions -│ ├── utils.py # Utility functions -│ └── config.py # Configuration management -├── tests/ -│ ├── conftest.py # Test configuration -│ ├── test_server.py # Server integration tests -│ ├── test_models.py # Model unit tests -│ └── test_utils.py # Utility function tests -├── examples/ -│ └── sample.c # Sample code for testing -├── main.py # Entry point -├── test_client.py # Simple test client -├── validate.py # Setup validation script -├── requirements.txt # Dependencies -├── Dockerfile # Joern Docker image -├── build.sh # Docker build script -└── README.md +│ ├── services/ # Session, Docker, Git, CPG, Query services +│ ├── tools/ # MCP tool definitions +│ ├── utils/ # Redis, logging, validators +│ └── models.py # Data models +├── playground/ # Test codebases and CPGs +├── main.py # Server entry point +├── config.yaml # Configuration +└── requirements.txt # Dependencies ``` ### Running Tests -**Run all tests**: ```bash +# Install dev dependencies +pip install -r requirements.txt + +# Run tests pytest -``` -**Run with coverage**: -```bash +# Run with coverage pytest --cov=src --cov-report=html ``` -**Run integration tests** (requires Docker): -```bash -pytest -m integration -``` - -**Run specific test file**: -```bash -pytest tests/test_server.py -``` - ### Code Quality -**Format code**: ```bash +# Format black src/ tests/ isort src/ tests/ -``` -**Lint code**: -```bash +# Lint flake8 src/ tests/ mypy src/ ``` ## Troubleshooting -### Common Issues +**Setup issues:** +```bash +# Re-run setup to rebuild and restart services +./setup.sh +``` + +**Docker issues:** +```bash +# Verify Docker is running +docker ps + +# Check Joern image +docker images | grep joern -**Docker connection error**: -- Ensure Docker is running -- Check Docker daemon accessibility -- Verify user permissions for Docker socket +# Check Redis container +docker ps | grep joern-redis +``` -**Image not found**: -- Build the Joern image: `docker build -t joern:latest .` -- Check image name in configuration -- Verify the build completed successfully: `docker images | grep joern` +**Redis connection issues:** +```bash +# Test Redis connection +docker exec joern-redis redis-cli ping -**Docker build issues**: -- Ensure Docker has sufficient disk space -- Check internet connectivity for downloading Joern -- Try building with more verbose output: `docker build -t joern:latest . --progress=plain` +# Check Redis logs +docker logs joern-redis -**Memory issues**: -- Increase Docker memory limit in config -- Reduce concurrent analysis limit -- Clear cache directory +# Restart Redis +docker restart joern-redis +``` -**Permission errors**: -- Check file/directory permissions -- Ensure cache directory is writable -- Verify Docker socket permissions +**Server connectivity:** +```bash +# Test server is running +curl http://localhost:4242/health -### Logging +# Check server logs for errors +python main.py +``` -Enable debug logging for troubleshooting: +**Debug logging:** ```bash -export JOERN_LOG_LEVEL=DEBUG +export MCP_LOG_LEVEL=DEBUG python main.py ``` @@ -334,16 +280,15 @@ python main.py 1. Fork the repository 2. Create a feature branch: `git checkout -b feature-name` 3. Make changes and add tests -4. Run tests and linting: `pytest && black . && flake8` -5. Commit changes: `git commit -am 'Add feature'` -6. Push to branch: `git push origin feature-name` -7. Submit a pull request +4. Run tests: `pytest && black . && flake8` +5. Submit a pull request ## License -MIT License - see LICENSE file for details. +MIT License - see [LICENSE](LICENSE) file for details. ## Acknowledgments - [Joern](https://github.com/joernio/joern) - Static analysis platform -- [Model Context Protocol](https://modelcontextprotocol.io/) - AI assistant integration standard +- [FastMCP](https://github.com/jlowin/fastmcp) - MCP framework +- [Model Context Protocol](https://modelcontextprotocol.io/) - MCP specification diff --git a/build.sh b/build.sh index 1565480..727e33c 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Build script for Joern MCP Server +# Build script for Joern Docker image set -e @@ -13,7 +13,7 @@ fi # Build the image echo "📦 Building joern:latest image..." -docker build -t joern:latest . --progress=plain +docker build -f Dockerfile.joern -t joern:latest . --progress=plain # Verify the image was built if docker images | grep -q "joern.*latest"; then diff --git a/examples/sample.c b/examples/sample.c deleted file mode 100644 index 4040937..0000000 --- a/examples/sample.c +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include - -#define TICK_NSEC 1000000 -#define NSEC_PER_SEC 1000000000 - -struct buffer { - char *data; - size_t size; - size_t capacity; -}; - -int initialize_buffer(struct buffer *buf, size_t initial_capacity) { - buf->data = (char *)malloc(initial_capacity); - if (!buf->data) return -1; - buf->size = 0; - buf->capacity = initial_capacity; - return 0; -} - -void cleanup_buffer(struct buffer *buf) { - if (buf->data) { - free(buf->data); - buf->data = NULL; - } - buf->size = 0; - buf->capacity = 0; -} - -int resize_buffer(struct buffer *buf, size_t new_capacity) { - char *new_data = (char *)realloc(buf->data, new_capacity); - if (!new_data) return -1; - buf->data = new_data; - buf->capacity = new_capacity; - return 0; -} - -int append_to_buffer(struct buffer *buf, const char *data, size_t len) { - if (buf->size + len > buf->capacity) { - size_t new_capacity = (buf->capacity == 0) ? 1024 : buf->capacity * 2; - while (new_capacity < buf->size + len) { - new_capacity *= 2; - } - if (resize_buffer(buf, new_capacity) != 0) { - return -1; - } - } - memcpy(buf->data + buf->size, data, len); - buf->size += len; - return 0; -} - -int safe_div_u64_rem(uint64_t dividend, uint64_t divisor, uint32_t *remainder) { - if (divisor == 0) { - return -1; - } - *remainder = (uint32_t)(dividend % divisor); - return 0; -} - -void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) { - uint32_t rem; - uint64_t result = (uint64_t)jiffies * TICK_NSEC; - if (safe_div_u64_rem(result, NSEC_PER_SEC, &rem) != 0) { - value->tv_sec = 0; - value->tv_nsec = 0; - return; - } - value->tv_sec = (int64_t)(result / NSEC_PER_SEC); - value->tv_nsec = (int64_t)rem; -} - -int process_jiffies(unsigned long jiffies, struct timespec *output) { - if (!output) return -1; - jiffies_to_timespec(jiffies, output); - return 0; -} - -int validate_and_convert(struct buffer *input_buf, struct timespec *output) { - if (!input_buf || !output) return -1; - unsigned long jiffies = 0; - if (input_buf->size > 0) { - char *tmp = (char *)malloc(input_buf->size + 1); - if (!tmp) return -1; - memcpy(tmp, input_buf->data, input_buf->size); - tmp[input_buf->size] = '\0'; - char *endptr; - jiffies = strtoul(tmp, &endptr, 10); - if (*endptr != '\0') { - free(tmp); - return -1; - } - free(tmp); - } - return process_jiffies(jiffies, output); -} - -int main() { - struct buffer input_buf; - struct timespec ts; - if (initialize_buffer(&input_buf, 1024) != 0) { - return 1; - } - const char *test_input = "1000"; - if (append_to_buffer(&input_buf, test_input, strlen(test_input)) != 0) { - cleanup_buffer(&input_buf); - return 1; - } - if (validate_and_convert(&input_buf, &ts) != 0) { - cleanup_buffer(&input_buf); - return 1; - } - printf("Seconds: %ld\n", ts.tv_sec); - printf("Nanoseconds: %ld\n", ts.tv_nsec); - cleanup_buffer(&input_buf); - return 0; -} \ No newline at end of file diff --git a/main.py b/main.py index 524ac1d..5254b0e 100644 --- a/main.py +++ b/main.py @@ -1,46 +1,129 @@ #!/usr/bin/env python3 """ -Joern MCP Server - Main entry point +Joern MCP Server - Main entry point using FastMCP This is the main entry point for the Joern MCP Server that provides static code analysis -capabilities using Joern's Code Property Graph (CPG) technology. +capabilities using Joern's Code Property Graph (CPG) technology with interactive shells. """ import asyncio -import sys -from pathlib import Path +import logging +from contextlib import asynccontextmanager + +from fastmcp import FastMCP -from src.server import JoernMCPServer from src.config import load_config +from src.services import ( + SessionManager, + GitManager, + CPGGenerator, + QueryExecutor, + DockerOrchestrator +) +from src.utils import RedisClient, setup_logging +from src.tools import register_tools + +# Global service instances +services = {} + +logger = logging.getLogger(__name__) -def main(): - """Main entry point for the Joern MCP Server""" - config_path = None - if len(sys.argv) > 1: - config_path = sys.argv[1] - if not Path(config_path).exists(): - print(f"Error: Configuration file not found: {config_path}") - sys.exit(1) +@asynccontextmanager +async def lifespan(mcp: FastMCP): + """Startup and shutdown logic for the FastMCP server""" + # Load configuration + config = load_config("config.yaml") + setup_logging(config.server.log_level) + logger.info("Starting Joern MCP Server") + + # Ensure required directories exist + import os + os.makedirs(config.storage.workspace_root, exist_ok=True) + os.makedirs("playground/cpgs", exist_ok=True) + logger.info("Created required directories") try: - # Load configuration - config = load_config(config_path) + # Initialize Redis + redis_client = RedisClient(config.redis) + await redis_client.connect() + logger.info("Redis client connected") + + # Initialize services + services['config'] = config + services['redis'] = redis_client + services['session_manager'] = SessionManager(redis_client, config.sessions) + services['git_manager'] = GitManager(config.storage.workspace_root) + services['cpg_generator'] = CPGGenerator(config.cpg, services['session_manager']) + + # Initialize Docker orchestrator + services['docker'] = DockerOrchestrator() + await services['docker'].initialize() + + # Set up Docker cleanup callback for session manager + services['session_manager'].set_docker_cleanup_callback( + services['docker'].stop_container + ) + + # Initialize CPG generator + await services['cpg_generator'].initialize() + + # Initialize query executor with reference to CPG generator + services['query_executor'] = QueryExecutor( + config.query, + config.joern, + redis_client, + services['cpg_generator'] + ) + + # Initialize query executor + await services['query_executor'].initialize() + + logger.info("All services initialized") + logger.info("Joern MCP Server is ready") - # Create and run server - server = JoernMCPServer(config) - asyncio.run(server.run()) + yield + + # Shutdown + logger.info("Shutting down Joern MCP Server") + + # Cleanup query executor sessions + await services['query_executor'].cleanup() + + # Cleanup Docker containers + await services['docker'].cleanup() + + # Close connections + await redis_client.close() + + logger.info("Joern MCP Server shutdown complete") - except KeyboardInterrupt: - print("\nShutting down Joern MCP Server...") - sys.exit(0) except Exception as e: - import traceback - print(f"Error starting server: {e}") - print("Full traceback:") - traceback.print_exc() - sys.exit(1) + logger.error(f"Error during server lifecycle: {e}", exc_info=True) + raise + + +# Initialize FastMCP server +mcp = FastMCP( + "Joern MCP Server", + dependencies=["fastmcp>=2.12.0", "redis", "gitpython", "pyyaml"], + lifespan=lifespan +) + +# Register MCP tools +register_tools(mcp, services) if __name__ == "__main__": - main() \ No newline at end of file + # Run the server with HTTP transport (Streamable HTTP) + # Get configuration + config_data = load_config("config.yaml") + host = config_data.server.host + port = config_data.server.port + + logger.info(f"Starting Joern MCP Server with HTTP transport on {host}:{port}") + + # Use HTTP transport (Streamable HTTP) for production deployment + # This enables network accessibility, multiple concurrent clients, + # and integration with web infrastructure + mcp.run(transport="http", host=host, port=port) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 134764d..0000000 --- a/pytest.ini +++ /dev/null @@ -1,17 +0,0 @@ -[pytest] -testpaths = tests -python_files = test_*.py -python_classes = Test* -python_functions = test_* -pythonpath = . -addopts = - --strict-markers - --disable-warnings - --verbose -markers = - integration: marks tests that require Docker and external services - slow: marks tests as slow (deselect with '-m "not slow"') - unit: marks tests as unit tests -filterwarnings = - ignore::UserWarning - ignore::DeprecationWarning \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b56967c..1b1c7f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,19 @@ # Core dependencies -pydantic==2.8.2 -mcp==1.0.0 +fastmcp>=2.12.4 +mcp>=1.16.0 +httpx>=0.28.1 +uvicorn[standard]==0.34.0 +pydantic==2.11.10 docker==7.1.0 -gitpython==3.1.43 -PyYAML==6.0.2 -aiohttp==3.10.5 +gitpython==3.1.45 +PyYAML==6.0.3 +redis==6.4.0 # Development dependencies -pytest==7.4.3 -pytest-asyncio==0.21.1 -pytest-mock==3.12.0 -black==23.9.1 -isort==5.12.0 -flake8==6.1.0 -mypy==1.6.1 \ No newline at end of file +pytest==8.4.2 +pytest-asyncio==1.2.0 +pytest-mock==3.15.1 +black==25.9.0 +isort==6.1.0 +flake8==7.3.0 +mypy==1.18.2 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py index f62a493..43a2259 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,9 +1,7 @@ """Joern MCP Server - A Model Context Protocol server for static code analysis using Joern.""" -__version__ = "1.0.0" -__author__ = "Joern MCP Team" -__email__ = "contact@joern-mcp.dev" - -from .server import JoernMCPServer +__version__ = "0.2.0" +__author__ = "Ahmed Lekssays" +__email__ = "ahmed@lekssays.com" __all__ = ["JoernMCPServer"] \ No newline at end of file diff --git a/src/config.py b/src/config.py index 4fdab87..8f0b326 100644 --- a/src/config.py +++ b/src/config.py @@ -1,32 +1,114 @@ """Configuration management for the Joern MCP Server.""" import os +import yaml from typing import Optional +from pathlib import Path -from .models import ServerConfig, DockerConfig, CacheConfig +from .models import ( + Config, + ServerConfig, + RedisConfig, + SessionConfig, + CPGConfig, + QueryConfig, + StorageConfig, + JoernConfig +) -def load_config(config_path: Optional[str] = None) -> ServerConfig: +def load_config(config_path: Optional[str] = None) -> Config: """Load configuration from file or environment variables""" if config_path and os.path.exists(config_path): - import yaml with open(config_path, 'r') as f: config_data = yaml.safe_load(f) - return ServerConfig(**config_data) + # Process environment variable substitutions + config_data = _substitute_env_vars(config_data) + return _dict_to_config(config_data) else: # Load from environment variables - return ServerConfig( - docker=DockerConfig( - image=os.getenv("JOERN_DOCKER_IMAGE", "joern:latest"), - cpu_limit=os.getenv("JOERN_CPU_LIMIT", "2"), - memory_limit=os.getenv("JOERN_MEMORY_LIMIT", "4g"), - timeout=int(os.getenv("JOERN_TIMEOUT", "300")) + return Config( + server=ServerConfig( + host=os.getenv("MCP_HOST", "0.0.0.0"), + port=int(os.getenv("MCP_PORT", "4242")), + log_level=os.getenv("MCP_LOG_LEVEL", "INFO") ), - cache=CacheConfig( - enabled=os.getenv("JOERN_CACHE_ENABLED", "true").lower() == "true", - max_size_gb=int(os.getenv("JOERN_CACHE_SIZE_GB", "10")), - directory=os.getenv("JOERN_CACHE_DIR", "/tmp/joern_cache") + redis=RedisConfig( + host=os.getenv("REDIS_HOST", "localhost"), + port=int(os.getenv("REDIS_PORT", "6379")), + password=os.getenv("REDIS_PASSWORD"), + db=int(os.getenv("REDIS_DB", "0")) ), - github_token=os.getenv("GITHUB_TOKEN"), - log_level=os.getenv("JOERN_LOG_LEVEL", "INFO") - ) \ No newline at end of file + joern=JoernConfig( + binary_path=os.getenv("JOERN_BINARY_PATH", "joern"), + memory_limit=os.getenv("JOERN_MEMORY_LIMIT", "4g") + ), + sessions=SessionConfig( + ttl=int(os.getenv("SESSION_TTL", "3600")), + idle_timeout=int(os.getenv("SESSION_IDLE_TIMEOUT", "1800")), + max_concurrent=int(os.getenv("MAX_CONCURRENT_SESSIONS", "10")) + ), + cpg=CPGConfig( + generation_timeout=int(os.getenv("CPG_GENERATION_TIMEOUT", "600")), + max_repo_size_mb=int(os.getenv("MAX_REPO_SIZE_MB", "500")) + ), + query=QueryConfig( + timeout=int(os.getenv("QUERY_TIMEOUT", "30")), + cache_enabled=os.getenv("QUERY_CACHE_ENABLED", "true").lower() == "true", + cache_ttl=int(os.getenv("QUERY_CACHE_TTL", "300")) + ), + storage=StorageConfig( + workspace_root=os.getenv("WORKSPACE_ROOT", "/tmp/joern-mcp"), + cleanup_on_shutdown=os.getenv("CLEANUP_ON_SHUTDOWN", "true").lower() == "true" + ) + ) + + +def _substitute_env_vars(data): + """Recursively substitute environment variables in config""" + if isinstance(data, dict): + return {k: _substitute_env_vars(v) for k, v in data.items()} + elif isinstance(data, list): + return [_substitute_env_vars(item) for item in data] + elif isinstance(data, str) and data.startswith("${") and data.endswith("}"): + env_var = data[2:-1] + default = None + if ":" in env_var: + env_var, default = env_var.split(":", 1) + return os.getenv(env_var, default) + return data + + +def _dict_to_config(data: dict) -> Config: + """Convert dictionary to Config object with proper type conversions""" + # Helper function to convert values based on dataclass field types + def convert_config_section(config_class, values): + if not values: + return config_class() + converted = {} + for field_name, field_type in config_class.__annotations__.items(): + if field_name in values: + value = values[field_name] + # Handle type conversions + if field_type == int or (hasattr(field_type, '__origin__') and field_type.__origin__ == int): + converted[field_name] = int(value) if value is not None else None + elif field_type == float or (hasattr(field_type, '__origin__') and field_type.__origin__ == float): + converted[field_name] = float(value) if value is not None else None + elif field_type == bool or (hasattr(field_type, '__origin__') and field_type.__origin__ == bool): + if isinstance(value, str): + converted[field_name] = value.lower() in ('true', '1', 'yes') + else: + converted[field_name] = bool(value) + else: + converted[field_name] = value + return config_class(**converted) + + return Config( + server=convert_config_section(ServerConfig, data.get("server", {})), + redis=convert_config_section(RedisConfig, data.get("redis", {})), + joern=convert_config_section(JoernConfig, data.get("joern", {})), + sessions=convert_config_section(SessionConfig, data.get("sessions", {})), + cpg=convert_config_section(CPGConfig, data.get("cpg", {})), + query=convert_config_section(QueryConfig, data.get("query", {})), + storage=convert_config_section(StorageConfig, data.get("storage", {})) + ) \ No newline at end of file diff --git a/src/models.py b/src/models.py index f4e0459..bc53d78 100644 --- a/src/models.py +++ b/src/models.py @@ -1,75 +1,160 @@ -"""Data models for the Joern MCP Server.""" +""" +Data models for Joern MCP Server +""" +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional, Dict, Any, List +from enum import Enum -import time -from pathlib import Path -from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +class SessionStatus(str, Enum): + """Session status enumeration""" + INITIALIZING = "initializing" + GENERATING = "generating" + READY = "ready" + ERROR = "error" -# Configuration Models -class DockerConfig(BaseModel): - """Docker configuration settings""" - image: str = "joern:latest" - cpu_limit: Optional[str] = "2" - memory_limit: str = "4g" - timeout: int = 300 # seconds - network_mode: str = "none" # isolated by default - -class CacheConfig(BaseModel): - """Cache configuration settings""" - enabled: bool = True - max_size_gb: int = 10 - ttl_hours: int = 24 - directory: str = "/tmp/joern_cache" - -class ServerConfig(BaseModel): - """Main server configuration""" - docker: DockerConfig = DockerConfig() - cache: CacheConfig = CacheConfig() - max_concurrent_analyses: int = 3 - supported_languages: List[str] = [ - "c", "cpp", "java", "javascript", "typescript", - "python", "go", "kotlin", "scala", "csharp" - ] - github_token: Optional[str] = None - log_level: str = "INFO" +class SourceType(str, Enum): + """Source type enumeration""" + LOCAL = "local" + GITHUB = "github" + -# Data Models -class ProjectInfo(BaseModel): - """Information about a loaded project""" +@dataclass +class Session: + """CPG session data model""" id: str - source_type: str # "github" or "local" - source_path: str - languages: List[str] = [] - size_loc: Optional[int] = None - cpg_generated: bool = False + container_id: Optional[str] = None + source_type: str = "" + source_path: str = "" + language: str = "" + status: str = SessionStatus.INITIALIZING.value cpg_path: Optional[str] = None - created_at: float = Field(default_factory=time.time) - last_accessed: float = Field(default_factory=time.time) + created_at: datetime = field(default_factory=datetime.utcnow) + last_accessed: datetime = field(default_factory=datetime.utcnow) + error_message: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + """Convert session to dictionary""" + return { + "id": self.id, + "container_id": self.container_id, + "source_type": self.source_type, + "source_path": self.source_path, + "language": self.language, + "status": self.status, + "cpg_path": self.cpg_path, + "created_at": self.created_at.isoformat(), + "last_accessed": self.last_accessed.isoformat(), + "error_message": self.error_message, + "metadata": self.metadata + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'Session': + """Create session from dictionary""" + return cls( + id=data["id"], + container_id=data.get("container_id"), + source_type=data.get("source_type", ""), + source_path=data.get("source_path", ""), + language=data.get("language", ""), + status=data.get("status", SessionStatus.INITIALIZING.value), + cpg_path=data.get("cpg_path"), + created_at=datetime.fromisoformat(data["created_at"]), + last_accessed=datetime.fromisoformat(data["last_accessed"]), + error_message=data.get("error_message"), + metadata=data.get("metadata", {}) + ) + -class QueryResult(BaseModel): - """Result of a Joern query execution""" - query: str +@dataclass +class QueryResult: + """Query execution result""" success: bool - results: List[Dict[str, Any]] = [] + data: Optional[List[Dict[str, Any]]] = None error: Optional[str] = None execution_time: float = 0.0 - timestamp: float = Field(default_factory=time.time) + row_count: int = 0 + + def to_dict(self) -> Dict[str, Any]: + """Convert result to dictionary""" + return { + "success": self.success, + "data": self.data, + "error": self.error, + "execution_time": self.execution_time, + "row_count": self.row_count + } + + +@dataclass +class JoernConfig: + """Joern configuration""" + binary_path: str = "joern" + memory_limit: str = "4g" + + +@dataclass +class ServerConfig: + """Server configuration""" + host: str = "0.0.0.0" + port: int = 4242 + log_level: str = "INFO" + + +@dataclass +class RedisConfig: + """Redis configuration""" + host: str = "localhost" + port: int = 6379 + password: Optional[str] = None + db: int = 0 + decode_responses: bool = True + + +@dataclass +class SessionConfig: + """Session management configuration""" + ttl: int = 3600 # 1 hour + idle_timeout: int = 1800 # 30 minutes + max_concurrent: int = 100 + + +@dataclass +class CPGConfig: + """CPG generation configuration""" + generation_timeout: int = 600 # 10 minutes + max_repo_size_mb: int = 500 + supported_languages: List[str] = field(default_factory=lambda: [ + "java", "c", "cpp", "javascript", "python", "go", "kotlin" + ]) + + +@dataclass +class QueryConfig: + """Query execution configuration""" + timeout: int = 30 + cache_enabled: bool = True + cache_ttl: int = 300 # 5 minutes -# Exception Classes -class JoernMCPError(Exception): - """Base exception for Joern MCP Server""" - pass -class ProjectLoadError(JoernMCPError): - """Error loading project""" - pass +@dataclass +class StorageConfig: + """Storage configuration""" + workspace_root: str = "/tmp/joern-mcp" + cleanup_on_shutdown: bool = True -class CPGGenerationError(JoernMCPError): - """Error generating CPG""" - pass -class QueryExecutionError(JoernMCPError): - """Error executing query""" - pass \ No newline at end of file +@dataclass +class Config: + """Main configuration""" + server: ServerConfig = field(default_factory=ServerConfig) + redis: RedisConfig = field(default_factory=RedisConfig) + joern: JoernConfig = field(default_factory=JoernConfig) + sessions: SessionConfig = field(default_factory=SessionConfig) + cpg: CPGConfig = field(default_factory=CPGConfig) + query: QueryConfig = field(default_factory=QueryConfig) + storage: StorageConfig = field(default_factory=StorageConfig) \ No newline at end of file diff --git a/src/server.py b/src/server.py deleted file mode 100644 index f7f344a..0000000 --- a/src/server.py +++ /dev/null @@ -1,832 +0,0 @@ -#!/usr/bin/env python3 -""" -Joern MCP Server - A Model Context Protocol server for static code analysis using Joern - -This server provides AI assistants with the ability to perform static code analysis -using Joern's Code Property Graph (CPG) technology in isolated Docker environments. -""" - -import asyncio -import json -import logging -import os -import shutil -import tempfile -import time -from pathlib import Path -from typing import Any, Dict, List, Optional -from urllib.parse import urlparse -import hashlib - -import docker -from docker.errors import DockerException, ContainerError, ImageNotFound -import git -from git.exc import GitError -from pydantic import BaseModel, Field -from mcp.server.models import InitializationOptions -from mcp.server import Server -from mcp.types import Tool, TextContent - -from .models import ( - ServerConfig, ProjectInfo, QueryResult, - JoernMCPError, ProjectLoadError, CPGGenerationError, QueryExecutionError -) -from .utils import detect_project_language, calculate_loc - - -class JoernMCPServer: - """Main Joern MCP Server implementation""" - - def __init__(self, config: Optional[ServerConfig] = None): - self.config = config or ServerConfig() - self.docker_client = None - self.projects: Dict[str, ProjectInfo] = {} - self.server = Server("joern-mcp-server") - self.logger = self._setup_logging() - self.cache_dir = Path(self.config.cache.directory) - self.cache_dir.mkdir(parents=True, exist_ok=True) - self._setup_handlers() - self._docker_initialized = False - - def _setup_logging(self) -> logging.Logger: - """Setup logging configuration""" - logging.basicConfig( - level=getattr(logging, self.config.log_level.upper()), - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) - return logging.getLogger(__name__) - - def _setup_handlers(self): - """Setup MCP server handlers""" - - @self.server.list_tools() - async def handle_list_tools() -> List[Tool]: - """List available tools""" - return [ - Tool( - name="load_project", - description="Load a project from GitHub URL or local path", - inputSchema={ - "type": "object", - "properties": { - "source": { - "type": "string", - "description": "GitHub URL or local file path" - }, - "branch": { - "type": "string", - "description": "Git branch/tag/commit (for GitHub sources)" - } - }, - "required": ["source"] - } - ), - Tool( - name="generate_cpg", - description="Generate Code Property Graph for a loaded project", - inputSchema={ - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "ID of the loaded project" - }, - "language": { - "type": "string", - "description": "Override auto-detected language" - } - }, - "required": ["project_id"] - } - ), - Tool( - name="run_query", - description="Execute a Joern query against a project's CPG", - inputSchema={ - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "ID of the project with generated CPG" - }, - "query": { - "type": "string", - "description": "Joern query to execute" - }, - "format": { - "type": "string", - "enum": ["json", "csv", "table"], - "default": "json", - "description": "Output format for results" - } - }, - "required": ["project_id", "query"] - } - ), - Tool( - name="list_projects", - description="List all loaded projects", - inputSchema={ - "type": "object", - "properties": {} - } - ), - Tool( - name="project_info", - description="Get detailed information about a project", - inputSchema={ - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "ID of the project" - } - }, - "required": ["project_id"] - } - ), - Tool( - name="list_queries", - description="List available pre-built security and quality queries", - inputSchema={ - "type": "object", - "properties": { - "category": { - "type": "string", - "enum": ["security", "quality", "metrics", "taint_analysis", "function_search", "reachability", "all"], - "default": "all" - } - } - } - ), - Tool( - name="cleanup_project", - description="Clean up project resources and remove from memory", - inputSchema={ - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "ID of the project to cleanup" - } - }, - "required": ["project_id"] - } - ) - ] - - @self.server.call_tool() - async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: - """Handle tool calls""" - try: - if name == "load_project": - result = await self.load_project(**arguments) - elif name == "generate_cpg": - result = await self.generate_cpg(**arguments) - elif name == "run_query": - result = await self.run_query(**arguments) - elif name == "list_projects": - result = await self.list_projects() - elif name == "project_info": - result = await self.project_info(**arguments) - elif name == "list_queries": - result = await self.list_queries(**arguments) - elif name == "cleanup_project": - result = await self.cleanup_project(**arguments) - else: - raise ValueError(f"Unknown tool: {name}") - - return [TextContent(type="text", text=json.dumps(result, indent=2))] - - except Exception as e: - self.logger.error(f"Error in tool {name}: {str(e)}", exc_info=True) - error_result = { - "success": False, - "error": str(e), - "tool": name, - "arguments": arguments - } - return [TextContent(type="text", text=json.dumps(error_result, indent=2))] - - async def _ensure_docker_initialized(self): - """Ensure Docker is initialized before operations that need it""" - if not self._docker_initialized: - await self.initialize_docker() - self._docker_initialized = True - - async def initialize_docker(self): - """Initialize Docker client""" - try: - self.docker_client = docker.from_env() - # Test connection - run in executor to avoid blocking - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, self.docker_client.ping) - self.logger.info("Docker client initialized successfully") - - # Ensure Joern image is available - await self._ensure_joern_image() - - except DockerException as e: - self.logger.error(f"Failed to initialize Docker client: {e}") - raise JoernMCPError(f"Docker initialization failed: {e}") - except Exception as e: - self.logger.error(f"Unexpected error during Docker initialization: {e}") - raise JoernMCPError(f"Docker initialization failed: {e}") - - async def _ensure_joern_image(self): - """Ensure Joern Docker image is available""" - try: - # Run in executor to avoid blocking the async loop - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, self._check_and_pull_image) - except Exception as e: - self.logger.error(f"Failed to ensure Joern image: {e}") - raise JoernMCPError(f"Docker image preparation failed: {e}") - - def _check_and_pull_image(self): - """Check and pull Joern image (synchronous helper)""" - try: - self.docker_client.images.get(self.config.docker.image) - self.logger.info(f"Joern image {self.config.docker.image} found") - except ImageNotFound: - self.logger.info(f"Pulling Joern image {self.config.docker.image}...") - self.docker_client.images.pull(self.config.docker.image) - self.logger.info("Joern image pulled successfully") - - def _generate_project_id(self, source: str) -> str: - """Generate unique project ID from source""" - return hashlib.md5(source.encode()).hexdigest()[:8] - - async def load_project(self, source: str, branch: Optional[str] = None) -> Dict[str, Any]: - """Load project from GitHub or local path""" - self.logger.info(f"Loading project from: {source}") - - project_id = self._generate_project_id(source) - temp_dir = None - - try: - # Determine source type - if source.startswith(('http://', 'https://')) and 'github.com' in source: - # GitHub source - temp_dir = await self._clone_github_repo(source, branch) - source_type = "github" - else: - # Local source - source_path = Path(source).resolve() - if not source_path.exists(): - raise ProjectLoadError(f"Local path does not exist: {source}") - temp_dir = source_path - source_type = "local" - - # Detect languages - languages = detect_project_language(temp_dir) - if not any(lang in self.config.supported_languages for lang in languages): - raise ProjectLoadError(f"Unsupported languages detected: {languages}") - - # Calculate LOC - loc = calculate_loc(temp_dir, languages) - - # Create project info - project_info = ProjectInfo( - id=project_id, - source_type=source_type, - source_path=str(temp_dir), - languages=languages, - size_loc=loc - ) - - self.projects[project_id] = project_info - - self.logger.info(f"Project loaded successfully: {project_id}") - return { - "success": True, - "project_id": project_id, - "project_info": project_info.dict() - } - - except Exception as e: - if temp_dir and source_type == "github": - shutil.rmtree(temp_dir, ignore_errors=True) - raise ProjectLoadError(f"Failed to load project: {str(e)}") - - async def _clone_github_repo(self, url: str, branch: Optional[str] = None) -> Path: - """Clone GitHub repository to temporary directory""" - temp_dir = Path(tempfile.mkdtemp(prefix="joern_project_")) - - try: - clone_kwargs = { - 'depth': 1, # Shallow clone by default - 'single_branch': True - } - - if branch: - clone_kwargs['branch'] = branch - - # Add authentication if available - if self.config.github_token: - parsed = urlparse(url) - auth_url = f"https://{self.config.github_token}@{parsed.netloc}{parsed.path}" - git.Repo.clone_from(auth_url, temp_dir, **clone_kwargs) - else: - git.Repo.clone_from(url, temp_dir, **clone_kwargs) - - return temp_dir - - except GitError as e: - shutil.rmtree(temp_dir, ignore_errors=True) - raise ProjectLoadError(f"Failed to clone repository: {str(e)}") - - async def generate_cpg(self, project_id: str, language: Optional[str] = None) -> Dict[str, Any]: - """Generate Code Property Graph for project""" - if project_id not in self.projects: - raise CPGGenerationError(f"Project not found: {project_id}") - - project = self.projects[project_id] - project.last_accessed = time.time() - - self.logger.info(f"Generating CPG for project: {project_id}") - - # Check cache first - cache_key = f"{project_id}_{language or 'auto'}" - cached_cpg = await self._get_cached_cpg(cache_key) - if cached_cpg: - project.cpg_generated = True - project.cpg_path = cached_cpg - return { - "success": True, - "project_id": project_id, - "cpg_path": cached_cpg, - "from_cache": True - } - - try: - # Determine language - target_language = language or project.languages[0] - if target_language not in self.config.supported_languages: - raise CPGGenerationError(f"Unsupported language: {target_language}") - - # Generate CPG using Docker - cpg_path = await self._run_cpg_generation(project, target_language) - - # Cache the CPG - await self._cache_cpg(cache_key, cpg_path) - - project.cpg_generated = True - project.cpg_path = cpg_path - - return { - "success": True, - "project_id": project_id, - "cpg_path": cpg_path, - "language": target_language, - "from_cache": False - } - - except Exception as e: - raise CPGGenerationError(f"CPG generation failed: {str(e)}") - - async def _run_cpg_generation(self, project: ProjectInfo, language: str) -> str: - """Run CPG generation in Docker container""" - await self._ensure_docker_initialized() - - # Create output directory using hash of project path for better caching - path_hash = hashlib.md5(project.source_path.encode()).hexdigest()[:8] - output_dir = self.cache_dir / f"cpg_{path_hash}" - output_dir.mkdir(exist_ok=True) - - try: - # If C or C++ project, prefer the c2cpg frontend which produces a cpg directly - if language in ("c", "cpp"): - self.logger.info("Using c2cpg frontend for C/C++ project") - - container = self.docker_client.containers.run( - self.config.docker.image, - command=f"/opt/joern/joern-cli/c2cpg.sh /app/input -o /app/output/cpg.bin", - volumes={ - str(Path(project.source_path)): {'bind': '/app/input', 'mode': 'ro'}, - str(output_dir): {'bind': '/app/output', 'mode': 'rw'} - }, - working_dir='/app', - environment={ - "JAVA_OPTS": "-Xmx4g", - "PATH": "/opt/joern:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - }, - network_mode=self.config.docker.network_mode, - mem_limit=self.config.docker.memory_limit, - detach=True, - remove=False - ) - - try: - result = container.wait(timeout=self.config.docker.timeout) - logs = container.logs().decode('utf-8') - - if result['StatusCode'] != 0: - raise CPGGenerationError(f"CPG generation failed (code {result['StatusCode']}): {logs}") - - cpg_path = output_dir / "cpg.bin" - if not cpg_path.exists(): - raise CPGGenerationError("CPG file not generated. Logs:\n" + logs) - - self.logger.info(f"Successfully generated CPG at {cpg_path}") - return str(cpg_path) - - except Exception as e: - self.logger.error(f"Error during CPG generation: {str(e)}") - try: - container.kill() - except: - pass - raise CPGGenerationError(f"Failed to generate CPG: {str(e)}") - - # Fallback: try the Joern REPL scripting approach for other languages - else: - # Create the Joern script - script_dir = Path(tempfile.mkdtemp(prefix="joern_script_")) - script_path = script_dir / "query.sc" - - script = """ -workspace.reset -val cpg = importCode("/app/input") -println("CPG generation completed") -System.exit(0) -""" - with open(script_path, 'w') as f: - f.write(script) - - container = self.docker_client.containers.run( - self.config.docker.image, - command="/opt/joern/joern-cli/joern --script /app/script/query.sc", - volumes={ - str(Path(project.source_path)): {'bind': '/app/input', 'mode': 'ro'}, - str(output_dir): {'bind': '/app/output', 'mode': 'rw'}, - str(script_dir): {'bind': '/app/script', 'mode': 'ro'} - }, - working_dir='/app', - environment={ - "JAVA_OPTS": "-Xmx4g", - "PATH": "/opt/joern:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - }, - network_mode=self.config.docker.network_mode, - mem_limit=self.config.docker.memory_limit, - detach=True, - remove=False - ) - - # Wait for completion with timeout and capture logs - try: - result = container.wait(timeout=self.config.docker.timeout) - logs = container.logs().decode('utf-8') - - if result['StatusCode'] != 0: - raise CPGGenerationError(f"CPG generation failed (code {result['StatusCode']}): {logs}") - - # Verify CPG was created - cpg_path = output_dir / "cpg.bin" - if not cpg_path.exists(): - raise CPGGenerationError("CPG file not generated. Logs:\n" + logs) - - self.logger.info(f"Successfully generated CPG at {cpg_path}") - return str(cpg_path) - - except Exception as e: - self.logger.error(f"Error during CPG generation: {str(e)}") - try: - container.kill() - except: - pass - raise CPGGenerationError(f"Failed to generate CPG: {str(e)}") - finally: - try: - shutil.rmtree(script_dir) - except: - pass - - except ContainerError as e: - raise CPGGenerationError(f"Docker container error: {str(e)}") - - async def run_query(self, project_id: str, query: str, format: str = "json") -> Dict[str, Any]: - """Execute Joern query against project CPG""" - if project_id not in self.projects: - raise QueryExecutionError(f"Project not found: {project_id}") - - project = self.projects[project_id] - if not project.cpg_generated or not project.cpg_path: - raise QueryExecutionError(f"CPG not generated for project: {project_id}") - - project.last_accessed = time.time() - - self.logger.info(f"Running query on project {project_id}: {query[:100]}...") - - start_time = time.time() - - try: - results = await self._execute_joern_query(project, query, format) - execution_time = time.time() - start_time - - query_result = QueryResult( - query=query, - success=True, - results=results, - execution_time=execution_time - ) - - return query_result.dict() - - except Exception as e: - execution_time = time.time() - start_time - query_result = QueryResult( - query=query, - success=False, - error=str(e), - execution_time=execution_time - ) - return query_result.dict() - - async def _execute_joern_query(self, project: ProjectInfo, query: str, format: str) -> List[Dict[str, Any]]: - """Execute query in Docker container""" - await self._ensure_docker_initialized() - - # Create temporary directory for query execution - temp_dir = Path(tempfile.mkdtemp(prefix="joern_query_")) - - try: - # Sanitize the incoming query: ensure loadCpg(...) yields a Cpg (not Option[Cpg]) - cpg_file_path = Path(project.cpg_path) - cpg_filename = cpg_file_path.name - container_cpg_path = f"/app/cpg/{cpg_filename}" - - sanitized_query = query.replace('loadCpg("/app/cpg/cpg.bin")', f'loadCpg("{container_cpg_path}").get') - sanitized_query = sanitized_query.replace('val cpg = loadCpg("/app/cpg/cpg.bin")', f'val cpg = loadCpg("{container_cpg_path}").get') - # Normalize accidental double .get from repeated replacements - sanitized_query = sanitized_query.replace('.get.get', '.get') - - # Use Joern's built-in JSON execution directives for proper serialization - if format == "json": - template = f""" -workspace.reset -val cpg = loadCpg("{container_cpg_path}").get -val result = {{ -%QUERY% -}} - -// Use Joern's toJsonPretty directive for proper serialization -import java.nio.file.Files -import java.nio.file.Paths -val jsonOutput = result.toJsonPretty -Files.write(Paths.get("/app/output/results.json"), jsonOutput.getBytes("utf-8")) -""" - - joern_script = template.replace("%QUERY%", sanitized_query) - else: - # For non-json formats, just print the selected output - template = f""" -workspace.reset -val cpg = loadCpg("{container_cpg_path}").get -val result = {{ -%QUERY% -}} -println(result.toList) -""" - - joern_script = template.replace("%QUERY%", sanitized_query) - - script_path = temp_dir / "query.sc" - with open(script_path, 'w') as f: - f.write(joern_script) - - # Run query in container - cpg_file_path = Path(project.cpg_path) - cpg_dir = cpg_file_path.parent - cpg_filename = cpg_file_path.name - - container = self.docker_client.containers.run( - self.config.docker.image, - command="/opt/joern/joern-cli/joern --script /app/output/query.sc", - volumes={ - str(temp_dir): {'bind': '/app/output', 'mode': 'rw'}, - str(cpg_dir): {'bind': '/app/cpg', 'mode': 'ro'} - }, - working_dir='/app', - environment={ - "JAVA_OPTS": "-Xmx4g", - "PATH": "/opt/joern:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - }, - network_mode=self.config.docker.network_mode, - mem_limit=self.config.docker.memory_limit, - detach=True, - remove=False - ) - - # Wait for completion - try: - result = container.wait(timeout=self.config.docker.timeout) - logs = container.logs().decode('utf-8') - - if result['StatusCode'] != 0: - raise QueryExecutionError(f"Query execution failed: {logs}") - - # Read results from results.json if created by script - results_path = temp_dir / "results.json" - if results_path.exists(): - with open(results_path, 'r') as f: - # Joern's toJsonPretty returns a valid JSON string that we can parse directly - raw = json.loads(f.read()) - return self._normalize_results(raw) - - # If no JSON could be extracted, return empty results - return [] - - except Exception as e: - try: - container.kill() - except: - pass - raise e - - finally: - shutil.rmtree(temp_dir, ignore_errors=True) - - async def list_projects(self) -> Dict[str, Any]: - """List all loaded projects""" - return { - "success": True, - "projects": [proj.dict() for proj in self.projects.values()] - } - - async def project_info(self, project_id: str) -> Dict[str, Any]: - """Get detailed project information""" - if project_id not in self.projects: - return { - "success": False, - "error": f"Project not found: {project_id}" - } - - project = self.projects[project_id] - project.last_accessed = time.time() - - return { - "success": True, - "project": project.dict() - } - - async def list_queries(self, category: str = "all") -> Dict[str, Any]: - """List pre-built queries""" - queries = { - "security": { - "sql_injection": "cpg.call.name(\".*exec.*\").code", - "xss_sinks": "cpg.call.name(\".*print.*\").argument", - "hardcoded_secrets": "cpg.literal.code(\".*password.*|.*key.*|.*secret.*\")", - "unsafe_deserialization": "cpg.call.name(\".*deserialize.*|.*pickle.*\")" - }, - "quality": { - "complex_methods": "cpg.method.filter(_.numberOfLines > 20).l", - "long_methods": "cpg.method.filter(_.numberOfLines > 50).l", - "duplicate_code": "cpg.method.l", - "unused_variables": "cpg.identifier.filter(_.refsTo.isEmpty).l" - }, - "metrics": { - "total_methods": "List(cpg.method.size).l", - "total_classes": "List(cpg.typeDecl.size).l", - "total_files": "List(cpg.file.size).l", - "average_complexity": "List(cpg.method.size).l" - }, - "taint_analysis": { - "taint_sources": "cpg.call.name(\".*(input|read|get|recv|scanf|fgets|fread).*\").l", - "taint_sinks": "cpg.call.name(\".*(exec|system|eval|print|printf|write|send).*\").l", - "data_flow_paths": "def source = cpg.call.name(\".*input.*\"); def sink = cpg.call.name(\".*exec.*\"); sink.reachableBy(source).l", - "function_reachability": "def startFunc = cpg.method.name(\".*main.*\"); def targetFunc = cpg.method.name(\".*exec.*\"); targetFunc.reachableBy(startFunc).l", - "call_graph": "cpg.method.name(\".*\").callee.l", - "control_flow": "cpg.method.name(\".*main.*\").controlStructure.l", - "parameter_tainting": "cpg.parameter.reachableBy(cpg.call.name(\".*input.*\")).l", - "return_value_tainting": "cpg.method.filter(method => method.call.name(\".*input.*\").size > 0).l", - "string_concatenation": "cpg.call.name(\".*strcat.*|.*sprintf.*\").l", - "buffer_overflow_risks": "cpg.call.name(\".*strcpy.*|.*memcpy.*|.*gets.*\").l", - "path_traversal": "cpg.literal.code(\".*\\\\.\\\\..*|/.*\").reachableBy(cpg.call.name(\".*open.*|.*read.*\")).l" - }, - "function_search": { - "all_functions": "cpg.method.l", - "functions_by_name": "cpg.method.name(\".*target.*\").l", - "functions_with_parameters": "cpg.method.filter(_.parameter.size > 0).l", - "recursive_functions": "cpg.method.filter(m => m.callOut.name(m.name).size > 0).l", - "extern_functions": "cpg.method.filter(_.name(\".*extern.*\")).l", - "inline_functions": "cpg.method.filter(_.name(\".*inline.*\")).l", - "main_functions": "cpg.method.name(\".*main.*\").l", - "library_functions": "cpg.call.nameNot(\".*\").filter(_.method.isEmpty).l", - "defined_functions": "cpg.method.filter(_.block.nonEmpty).l" - }, - "reachability": { - "method_reachability": "def source = cpg.method.name(\".*source.*\"); def sink = cpg.method.name(\".*sink.*\"); sink.reachableBy(source).l", - "call_reachability": "def sourceCall = cpg.call.name(\".*source.*\"); def sinkCall = cpg.call.name(\".*sink.*\"); sinkCall.reachableBy(sourceCall).l", - "data_reachability": "def source = cpg.identifier.name(\".*input.*\"); def sink = cpg.identifier.name(\".*output.*\"); sink.reachableBy(source).l", - "control_reachability": "cpg.controlStructure.reachableBy(cpg.method.name(\".*main.*\")).l", - "file_reachability": "cpg.method.name(\".*main.*\").file.l", - "type_reachability": "cpg.method.name(\".*main.*\").typeDecl.l" - } - } - - if category == "all": - result_queries = queries - elif category in queries: - result_queries = {category: queries[category]} - else: - return { - "success": False, - "error": f"Unknown category: {category}" - } - - return { - "success": True, - "queries": result_queries - } - - async def cleanup_project(self, project_id: str) -> Dict[str, Any]: - """Clean up project resources""" - if project_id not in self.projects: - return { - "success": False, - "error": f"Project not found: {project_id}" - } - - project = self.projects[project_id] - - try: - # Remove temporary files - if project.source_type == "github" and os.path.exists(project.source_path): - shutil.rmtree(project.source_path, ignore_errors=True) - - # Remove CPG files - if project.cpg_path and os.path.exists(project.cpg_path): - cpg_dir = Path(project.cpg_path).parent - shutil.rmtree(cpg_dir, ignore_errors=True) - - # Remove from memory - del self.projects[project_id] - - return { - "success": True, - "message": f"Project {project_id} cleaned up successfully" - } - - except Exception as e: - return { - "success": False, - "error": f"Cleanup failed: {str(e)}" - } - - async def _get_cached_cpg(self, cache_key: str) -> Optional[str]: - """Get CPG from cache if available""" - if not self.config.cache.enabled: - return None - - cache_path = self.cache_dir / f"{cache_key}.bin" - if cache_path.exists(): - # Check if cache is still valid - age = time.time() - cache_path.stat().st_mtime - if age < (self.config.cache.ttl_hours * 3600): - return str(cache_path) - - return None - - async def _cache_cpg(self, cache_key: str, cpg_path: str): - """Cache generated CPG""" - if not self.config.cache.enabled: - return - - cache_path = self.cache_dir / f"{cache_key}.bin" - shutil.copy2(cpg_path, cache_path) - - async def run(self): - """Run the MCP server""" - try: - # Import the MCP server runner - from mcp.server.stdio import stdio_server - - async with stdio_server() as (read_stream, write_stream): - await self.server.run( - read_stream, - write_stream, - self.server.create_initialization_options() - ) - except Exception as e: - self.logger.error(f"Error running MCP server: {str(e)}", exc_info=True) - raise - - def _normalize_results(self, raw): - """Normalize raw JSON results into a list of dictionaries for QueryResult. - - Since we're now using Joern's native toJsonPretty directive, the results - should already be properly serialized. We just need to handle the structure. - """ - if isinstance(raw, list): - # If it's a list of objects with properties, return as-is - if all(isinstance(x, dict) for x in raw): - return raw - # If it's a list of primitives, wrap each as {"value": primitive} - else: - return [{"value": x} for x in raw] - elif isinstance(raw, dict): - return [raw] - else: - # Single primitive value - return [{"value": raw}] \ No newline at end of file diff --git a/test_client.py b/test_client.py deleted file mode 100644 index 8ae0fc6..0000000 --- a/test_client.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test client for the Joern MCP Server. - -This client demonstrates how to interact with the Joern MCP Server programmatically. -""" - -import asyncio -import json -import sys -from pathlib import Path - -from src.server import JoernMCPServer -from src.config import load_config - - -async def test_basic_functionality(): - """Test basic server functionality""" - print("🚀 Starting Joern MCP Server test...") - print("📝 Note: Ensure Docker image is built with: docker build -t joern:latest .") - - # Load configuration - config = load_config() - server = JoernMCPServer(config) - - try: - # Docker will be initialized automatically when needed - print("🚀 Server ready, Docker will initialize on first use") - - # Load sample project - sample_path = Path(__file__).parent / "examples" / "sample.c" - if not sample_path.exists(): - print(f"❌ Sample file not found: {sample_path}") - return - - print(f"📁 Loading project from: {sample_path.parent}") - load_result = await server.load_project(str(sample_path.parent)) - - if not load_result["success"]: - print(f"❌ Failed to load project: {load_result}") - return - - project_id = load_result["project_id"] - print(f"✅ Project loaded with ID: {project_id}") - - # List projects - print("📋 Listing projects...") - projects_result = await server.list_projects() - print(f"Projects: {len(projects_result['projects'])}") - - # Generate CPG - print("🔧 Generating CPG...") - cpg_result = await server.generate_cpg(project_id) - - if not cpg_result["success"]: - print(f"❌ Failed to generate CPG: {cpg_result}") - return - - print(f"✅ CPG generated at: {cpg_result['cpg_path']}") - - # Run a simple query - print("🔍 Running query...") - query = "cpg.method.l" - query_result = await server.run_query(project_id, query) - - if query_result["success"]: - methods_count = len(query_result["results"]) - print(f"✅ Query executed successfully, found {methods_count} methods") - - # Show first few results - if query_result["results"]: - print("📊 Sample results:") - for i, method in enumerate(query_result["results"][:3]): - name = method.get("name", "unknown") - print(f" {i+1}. {name}") - else: - print(f"❌ Query failed: {query_result['error']}") - - # Test all predefined queries - print("🧪 Testing all predefined queries...") - - # Get all categories - all_queries_result = await server.list_queries("all") - if all_queries_result["success"]: - categories = all_queries_result["queries"] - test_results = {} - - for category_name, category_queries in categories.items(): - print(f"📊 Testing {len(category_queries)} {category_name} queries:") - - for query_name, query in category_queries.items(): - print(f" 🔍 Testing {query_name}...") - try: - result = await server.run_query(project_id, query) - if result["success"]: - result_count = len(result["results"]) - test_results[f"{category_name}_{query_name}"] = { - "status": "success", - "result_count": result_count, - "execution_time": result["execution_time"] - } - print(f" ✅ {query_name}: {result_count} results ({result['execution_time']:.2f}s)") - else: - test_results[f"{category_name}_{query_name}"] = { - "status": "failed", - "error": result["error"] - } - print(f" ❌ {query_name}: {result['error']}") - except Exception as e: - test_results[f"{category_name}_{query_name}"] = { - "status": "error", - "error": str(e) - } - print(f" 💥 {query_name}: {str(e)}") - - # Output results in JSON format - print("📄 All test results in JSON format:") - print(json.dumps(test_results, indent=2)) - - # Cleanup - print("🧹 Cleaning up...") - cleanup_result = await server.cleanup_project(project_id) - if cleanup_result["success"]: - print("✅ Cleanup completed") - - print("🎉 Test completed successfully!") - - except Exception as e: - print(f"❌ Test failed with error: {e}") - import traceback - traceback.print_exc() - - -if __name__ == "__main__": - asyncio.run(test_basic_functionality()) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 564b326..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Test configuration and fixtures for Joern MCP Server tests.""" - -import pytest -import tempfile -import shutil -import sys -from pathlib import Path - -# Add src directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src.models import ServerConfig, DockerConfig, CacheConfig -from src.server import JoernMCPServer - - -@pytest.fixture -def temp_dir(): - """Create a temporary directory for testing""" - temp_path = Path(tempfile.mkdtemp()) - yield temp_path - shutil.rmtree(temp_path, ignore_errors=True) - - -@pytest.fixture -def test_config(temp_dir): - """Create a test configuration""" - return ServerConfig( - docker=DockerConfig( - image="joern:latest", - timeout=60, - memory_limit="2g" - ), - cache=CacheConfig( - enabled=True, - directory=str(temp_dir / "cache"), - ttl_hours=1 - ), - log_level="DEBUG" - ) - - -@pytest.fixture -def server(test_config): - """Create a test server instance""" - return JoernMCPServer(test_config) - - -@pytest.fixture -def sample_c_project(temp_dir): - """Create a sample C project for testing""" - project_dir = temp_dir / "sample_c" - project_dir.mkdir() - - # Create a simple C file - c_file = project_dir / "main.c" - c_file.write_text(""" -#include -#include - -int add(int a, int b) { - return a + b; -} - -int main() { - int x = 5; - int y = 10; - int result = add(x, y); - printf("Result: %d\\n", result); - return 0; -} -""") - - return project_dir - - -@pytest.fixture -def sample_python_project(temp_dir): - """Create a sample Python project for testing""" - project_dir = temp_dir / "sample_python" - project_dir.mkdir() - - # Create a simple Python file - py_file = project_dir / "main.py" - py_file.write_text(""" -def add(a, b): - return a + b - -def main(): - x = 5 - y = 10 - result = add(x, y) - print(f"Result: {result}") - -if __name__ == "__main__": - main() -""") - - # Create requirements.txt - req_file = project_dir / "requirements.txt" - req_file.write_text("pytest==7.0.0\n") - - return project_dir \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index 5010e18..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Tests for configuration management.""" - -import pytest -import tempfile -import os -import sys -from pathlib import Path - -# Add src directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src.config import load_config -from src.models import ServerConfig - - -def test_load_config_from_environment(): - """Test loading configuration from environment variables""" - # Set some environment variables - os.environ["JOERN_DOCKER_IMAGE"] = "test:latest" - os.environ["JOERN_MEMORY_LIMIT"] = "8g" - os.environ["JOERN_LOG_LEVEL"] = "DEBUG" - - try: - config = load_config() - - assert config.docker.image == "test:latest" - assert config.docker.memory_limit == "8g" - assert config.log_level == "DEBUG" - - finally: - # Clean up environment variables - for var in ["JOERN_DOCKER_IMAGE", "JOERN_MEMORY_LIMIT", "JOERN_LOG_LEVEL"]: - os.environ.pop(var, None) - - -def test_load_config_defaults(): - """Test loading configuration with default values""" - # Clear any existing environment variables - env_vars = [ - "JOERN_DOCKER_IMAGE", "JOERN_CPU_LIMIT", "JOERN_MEMORY_LIMIT", - "JOERN_TIMEOUT", "JOERN_CACHE_ENABLED", "JOERN_CACHE_SIZE_GB", - "JOERN_CACHE_DIR", "GITHUB_TOKEN", "JOERN_LOG_LEVEL" - ] - - original_values = {} - for var in env_vars: - original_values[var] = os.environ.pop(var, None) - - try: - config = load_config() - - # Check defaults - assert config.docker.image == "joern:latest" - assert config.docker.memory_limit == "4g" - assert config.docker.timeout == 300 - assert config.cache.enabled is True - assert config.log_level == "INFO" - - finally: - # Restore original environment - for var, value in original_values.items(): - if value is not None: - os.environ[var] = value - - -def test_load_config_from_yaml_file(): - """Test loading configuration from YAML file""" - yaml_content = """ -docker: - image: "custom:latest" - memory_limit: "6g" - timeout: 600 - -cache: - enabled: false - max_size_gb: 5 - -log_level: "ERROR" -""" - - with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f: - f.write(yaml_content) - f.flush() - - try: - config = load_config(f.name) - - assert config.docker.image == "custom:latest" - assert config.docker.memory_limit == "6g" - assert config.docker.timeout == 600 - assert config.cache.enabled is False - assert config.cache.max_size_gb == 5 - assert config.log_level == "ERROR" - - finally: - os.unlink(f.name) - - -def test_load_config_nonexistent_file(): - """Test loading configuration with non-existent file""" - config = load_config("/nonexistent/config.yml") - - # Should fall back to environment/defaults - assert isinstance(config, ServerConfig) - assert config.docker.image == "joern:latest" # default value \ No newline at end of file diff --git a/tests/test_models.py b/tests/test_models.py deleted file mode 100644 index 01a4703..0000000 --- a/tests/test_models.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Tests for data models.""" - -import pytest -import time -import sys -from pathlib import Path - -# Add src directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src.models import ( - ServerConfig, DockerConfig, CacheConfig, - ProjectInfo, QueryResult, - JoernMCPError, ProjectLoadError -) - - -def test_docker_config_defaults(): - """Test DockerConfig default values""" - config = DockerConfig() - assert config.image == "joern:latest" - assert config.memory_limit == "4g" - assert config.timeout == 300 - assert config.network_mode == "none" - - -def test_cache_config_defaults(): - """Test CacheConfig default values""" - config = CacheConfig() - assert config.enabled is True - assert config.max_size_gb == 10 - assert config.ttl_hours == 24 - assert config.directory == "/tmp/joern_cache" - - -def test_server_config_defaults(): - """Test ServerConfig default values""" - config = ServerConfig() - assert config.max_concurrent_analyses == 3 - assert "c" in config.supported_languages - assert "python" in config.supported_languages - assert config.log_level == "INFO" - - -def test_project_info_creation(): - """Test ProjectInfo model creation""" - project = ProjectInfo( - id="test123", - source_type="github", - source_path="/tmp/test", - languages=["c", "cpp"], - size_loc=100 - ) - - assert project.id == "test123" - assert project.source_type == "github" - assert project.languages == ["c", "cpp"] - assert project.size_loc == 100 - assert project.cpg_generated is False - assert project.cpg_path is None - assert project.created_at > 0 - assert project.last_accessed > 0 - - -def test_project_info_timestamps(): - """Test that timestamps are properly set""" - before = time.time() - project = ProjectInfo( - id="test123", - source_type="local", - source_path="/tmp/test" - ) - after = time.time() - - assert before <= project.created_at <= after - assert before <= project.last_accessed <= after - - -def test_query_result_success(): - """Test successful QueryResult creation""" - result = QueryResult( - query="cpg.method.l", - success=True, - results=[{"name": "main", "id": 123}], - execution_time=1.5 - ) - - assert result.success is True - assert result.error is None - assert len(result.results) == 1 - assert result.execution_time == 1.5 - assert result.timestamp > 0 - - -def test_query_result_error(): - """Test error QueryResult creation""" - result = QueryResult( - query="invalid.query", - success=False, - error="Invalid query syntax", - execution_time=0.1 - ) - - assert result.success is False - assert result.error == "Invalid query syntax" - assert result.results == [] - - -def test_joern_mcp_error(): - """Test base exception class""" - error = JoernMCPError("Test error") - assert str(error) == "Test error" - - -def test_project_load_error(): - """Test ProjectLoadError inheritance""" - error = ProjectLoadError("Failed to load project") - assert isinstance(error, JoernMCPError) - assert str(error) == "Failed to load project" \ No newline at end of file diff --git a/tests/test_server.py b/tests/test_server.py deleted file mode 100644 index d6fcca9..0000000 --- a/tests/test_server.py +++ /dev/null @@ -1,220 +0,0 @@ -"""Integration tests for the Joern MCP Server.""" - -import pytest -import asyncio -import json -import sys -from pathlib import Path -from unittest.mock import Mock, patch - -# Add src directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src.server import JoernMCPServer -from src.models import ProjectLoadError, CPGGenerationError - - -class TestJoernMCPServer: - """Test cases for JoernMCPServer""" - - def test_server_initialization(self, test_config): - """Test server initialization with config""" - server = JoernMCPServer(test_config) - assert server.config == test_config - assert server.projects == {} - assert server.logger is not None - assert server.cache_dir.exists() - - def test_generate_project_id(self, server): - """Test project ID generation""" - source1 = "https://github.com/user/repo1" - source2 = "https://github.com/user/repo2" - - id1 = server._generate_project_id(source1) - id2 = server._generate_project_id(source2) - - assert len(id1) == 8 - assert len(id2) == 8 - assert id1 != id2 - - # Same source should produce same ID - id1_repeat = server._generate_project_id(source1) - assert id1 == id1_repeat - - @pytest.mark.asyncio - async def test_load_local_project(self, server, sample_c_project): - """Test loading a local project""" - result = await server.load_project(str(sample_c_project)) - - assert result["success"] is True - assert "project_id" in result - assert "project_info" in result - - project_id = result["project_id"] - assert project_id in server.projects - - project = server.projects[project_id] - assert project.source_type == "local" - assert "c" in project.languages - assert project.size_loc > 0 - - @pytest.mark.asyncio - async def test_load_nonexistent_local_project(self, server): - """Test loading a non-existent local project""" - with pytest.raises(ProjectLoadError): - await server.load_project("/nonexistent/path") - - @pytest.mark.asyncio - async def test_list_projects_empty(self, server): - """Test listing projects when none are loaded""" - result = await server.list_projects() - - assert result["success"] is True - assert result["projects"] == [] - - @pytest.mark.asyncio - async def test_list_projects_with_projects(self, server, sample_c_project): - """Test listing projects when some are loaded""" - # Load a project first - await server.load_project(str(sample_c_project)) - - result = await server.list_projects() - - assert result["success"] is True - assert len(result["projects"]) == 1 - assert result["projects"][0]["source_type"] == "local" - - @pytest.mark.asyncio - async def test_project_info_existing(self, server, sample_c_project): - """Test getting info for existing project""" - # Load a project first - load_result = await server.load_project(str(sample_c_project)) - project_id = load_result["project_id"] - - result = await server.project_info(project_id) - - assert result["success"] is True - assert result["project"]["id"] == project_id - assert result["project"]["source_type"] == "local" - - @pytest.mark.asyncio - async def test_project_info_nonexistent(self, server): - """Test getting info for non-existent project""" - result = await server.project_info("nonexistent") - - assert result["success"] is False - assert "error" in result - - @pytest.mark.asyncio - async def test_list_queries_all(self, server): - """Test listing all available queries""" - result = await server.list_queries("all") - - assert result["success"] is True - assert "queries" in result - assert "security" in result["queries"] - assert "quality" in result["queries"] - assert "metrics" in result["queries"] - - @pytest.mark.asyncio - async def test_list_queries_category(self, server): - """Test listing queries by category""" - result = await server.list_queries("security") - - assert result["success"] is True - assert "queries" in result - assert "security" in result["queries"] - assert "quality" not in result["queries"] - - @pytest.mark.asyncio - async def test_list_queries_invalid_category(self, server): - """Test listing queries with invalid category""" - result = await server.list_queries("invalid") - - assert result["success"] is False - assert "error" in result - - @pytest.mark.asyncio - async def test_cleanup_project_existing(self, server, sample_c_project): - """Test cleaning up an existing project""" - # Load a project first - load_result = await server.load_project(str(sample_c_project)) - project_id = load_result["project_id"] - - # Verify project exists - assert project_id in server.projects - - # Cleanup - result = await server.cleanup_project(project_id) - - assert result["success"] is True - assert project_id not in server.projects - - @pytest.mark.asyncio - async def test_cleanup_project_nonexistent(self, server): - """Test cleaning up a non-existent project""" - result = await server.cleanup_project("nonexistent") - - assert result["success"] is False - assert "error" in result - - def test_normalize_results_list_of_dicts(self, server): - """Test normalizing list of dictionaries""" - raw = [{"name": "func1", "id": 1}, {"name": "func2", "id": 2}] - result = server._normalize_results(raw) - - assert result == raw - - def test_normalize_results_list_of_primitives(self, server): - """Test normalizing list of primitives""" - raw = ["func1", "func2", "func3"] - result = server._normalize_results(raw) - - expected = [{"value": "func1"}, {"value": "func2"}, {"value": "func3"}] - assert result == expected - - def test_normalize_results_single_dict(self, server): - """Test normalizing single dictionary""" - raw = {"name": "func1", "id": 1} - result = server._normalize_results(raw) - - assert result == [raw] - - def test_normalize_results_single_primitive(self, server): - """Test normalizing single primitive value""" - raw = "single_value" - result = server._normalize_results(raw) - - assert result == [{"value": "single_value"}] - - -class TestDockerIntegration: - """Test cases that require Docker (should be marked for optional execution)""" - - @pytest.mark.integration - @pytest.mark.asyncio - async def test_docker_initialization(self, server): - """Test Docker client initialization""" - # This test requires Docker to be running - await server.initialize_docker() - assert server.docker_client is not None - - @pytest.mark.integration - @pytest.mark.asyncio - async def test_cpg_generation_c_project(self, server, sample_c_project): - """Test CPG generation for C project""" - # Load project first - load_result = await server.load_project(str(sample_c_project)) - project_id = load_result["project_id"] - - # Generate CPG - result = await server.generate_cpg(project_id) - - assert result["success"] is True - assert result["project_id"] == project_id - assert "cpg_path" in result - - # Verify project state - project = server.projects[project_id] - assert project.cpg_generated is True - assert project.cpg_path is not None \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index 38dfc79..0000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Tests for utility functions.""" - -import pytest -import sys -from pathlib import Path - -# Add src directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src.utils import detect_project_language, calculate_loc - - -def test_detect_c_language(sample_c_project): - """Test detection of C language""" - languages = detect_project_language(sample_c_project) - assert "c" in languages - - -def test_detect_python_language(sample_python_project): - """Test detection of Python language""" - languages = detect_project_language(sample_python_project) - assert "python" in languages - - -def test_detect_unknown_language(temp_dir): - """Test detection when no known languages are present""" - # Create a file with unknown extension - unknown_file = temp_dir / "test.xyz" - unknown_file.write_text("some content") - - languages = detect_project_language(temp_dir) - assert languages == ["unknown"] - - -def test_calculate_loc_c_project(sample_c_project): - """Test LOC calculation for C project""" - languages = ["c"] - loc = calculate_loc(sample_c_project, languages) - assert loc > 0 # Should count non-empty lines - assert loc < 20 # Simple project should have reasonable LOC - - -def test_calculate_loc_python_project(sample_python_project): - """Test LOC calculation for Python project""" - languages = ["python"] - loc = calculate_loc(sample_python_project, languages) - assert loc > 0 - assert loc < 15 - - -def test_calculate_loc_empty_project(temp_dir): - """Test LOC calculation for empty project""" - languages = ["c"] - loc = calculate_loc(temp_dir, languages) - assert loc == 0 - - -def test_calculate_loc_unsupported_language(sample_c_project): - """Test LOC calculation with unsupported language""" - languages = ["unsupported"] - loc = calculate_loc(sample_c_project, languages) - assert loc == 0 # Should not count files for unsupported languages \ No newline at end of file diff --git a/validate.py b/validate.py deleted file mode 100755 index 96cfeec..0000000 --- a/validate.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python3 -""" -Validation script for Joern MCP Server setup. - -This script checks that all prerequisites are properly configured. -""" - -import subprocess -import sys -from pathlib import Path - -def check_python(): - """Check Python version""" - version = sys.version_info - if version.major >= 3 and version.minor >= 8: - print(f"✅ Python {version.major}.{version.minor}.{version.micro}") - return True - else: - print(f"❌ Python {version.major}.{version.minor}.{version.micro} (requires 3.8+)") - return False - -def check_docker(): - """Check Docker availability""" - try: - result = subprocess.run(['docker', '--version'], - capture_output=True, text=True, timeout=10) - if result.returncode == 0: - print(f"✅ Docker: {result.stdout.strip()}") - return True - else: - print("❌ Docker not found or not working") - return False - except (subprocess.TimeoutExpired, FileNotFoundError): - print("❌ Docker not found or not responding") - return False - -def check_docker_running(): - """Check if Docker daemon is running""" - try: - result = subprocess.run(['docker', 'info'], - capture_output=True, text=True, timeout=10) - if result.returncode == 0: - print("✅ Docker daemon is running") - return True - else: - print("❌ Docker daemon is not running") - return False - except subprocess.TimeoutExpired: - print("❌ Docker daemon not responding") - return False - -def check_joern_image(): - """Check if Joern Docker image exists""" - try: - result = subprocess.run(['docker', 'images', 'joern:latest', '--format', '{{.Repository}}:{{.Tag}}'], - capture_output=True, text=True, timeout=10) - if result.returncode == 0 and 'joern:latest' in result.stdout: - print("✅ Joern Docker image (joern:latest) found") - return True - else: - print("❌ Joern Docker image not found") - print(" Run: ./build.sh or docker build -t joern:latest .") - return False - except subprocess.TimeoutExpired: - print("❌ Docker not responding when checking images") - return False - -def check_dependencies(): - """Check if Python dependencies are available""" - required_modules = ['docker', 'git', 'pydantic', 'mcp', 'yaml'] - missing = [] - - for module in required_modules: - try: - __import__(module) - except ImportError: - missing.append(module) - - if not missing: - print("✅ All Python dependencies available") - return True - else: - print(f"❌ Missing Python dependencies: {', '.join(missing)}") - print(" Run: pip install -r requirements.txt") - return False - -def check_project_structure(): - """Check if project structure is correct""" - required_files = [ - 'main.py', - 'src/__init__.py', - 'src/server.py', - 'src/models.py', - 'src/config.py', - 'src/utils.py', - 'requirements.txt', - 'Dockerfile', - 'build.sh' - ] - - missing = [] - for file_path in required_files: - if not Path(file_path).exists(): - missing.append(file_path) - - if not missing: - print("✅ Project structure is complete") - return True - else: - print(f"❌ Missing files: {', '.join(missing)}") - return False - -def main(): - """Run all validation checks""" - print("🔍 Validating Joern MCP Server setup...\n") - - checks = [ - ("Python version", check_python), - ("Docker availability", check_docker), - ("Docker daemon", check_docker_running), - ("Python dependencies", check_dependencies), - ("Project structure", check_project_structure), - ("Joern Docker image", check_joern_image), - ] - - passed = 0 - total = len(checks) - - for name, check_func in checks: - print(f"Checking {name}...") - if check_func(): - passed += 1 - print() - - print(f"📊 Validation Results: {passed}/{total} checks passed") - - if passed == total: - print("🎉 All checks passed! Joern MCP Server is ready to run.") - print(" Start with: python main.py") - return 0 - else: - print("❌ Some checks failed. Please fix the issues above.") - return 1 - -if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file From c51f19934a0e1baa8eece0126791e12edb15a1d1 Mon Sep 17 00:00:00 2001 From: Ahmed Lekssays Date: Tue, 7 Oct 2025 19:55:07 +0300 Subject: [PATCH 2/3] major code refactoring --- .env.example | 38 + Dockerfile.joern | 41 + cleanup.py | 397 ++++++++ config.example.yaml | 41 + playground/README.md | 72 ++ playground/codebases/sample/sample.c | 120 +++ playground/codebases/sample/sample_asan | Bin 0 -> 29656 bytes sample_client.py | 257 +++++ setup.sh | 77 ++ src/exceptions.py | 48 + src/services/__init__.py | 16 + src/services/cpg_generator.py | 313 ++++++ src/services/docker_orchestrator.py | 108 +++ src/services/git_manager.py | 131 +++ src/services/query_executor.py | 560 +++++++++++ src/services/session_manager.py | 212 +++++ src/tools/__init__.py | 6 + src/tools/mcp_tools.py | 1152 +++++++++++++++++++++++ src/utils/__init__.py | 31 + src/utils/logging.py | 41 + src/utils/redis_client.py | 109 +++ src/utils/validators.py | 113 +++ 22 files changed, 3883 insertions(+) create mode 100644 .env.example create mode 100644 Dockerfile.joern create mode 100755 cleanup.py create mode 100644 config.example.yaml create mode 100644 playground/README.md create mode 100644 playground/codebases/sample/sample.c create mode 100755 playground/codebases/sample/sample_asan create mode 100644 sample_client.py create mode 100755 setup.sh create mode 100644 src/exceptions.py create mode 100644 src/services/__init__.py create mode 100644 src/services/cpg_generator.py create mode 100644 src/services/docker_orchestrator.py create mode 100644 src/services/git_manager.py create mode 100644 src/services/query_executor.py create mode 100644 src/services/session_manager.py create mode 100644 src/tools/__init__.py create mode 100644 src/tools/mcp_tools.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/logging.py create mode 100644 src/utils/redis_client.py create mode 100644 src/utils/validators.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b63fd52 --- /dev/null +++ b/.env.example @@ -0,0 +1,38 @@ +# Server Configuration +MCP_HOST=0.0.0.0 +MCP_PORT=4242 +MCP_LOG_LEVEL=INFO + +# Redis Configuration +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD= +REDIS_DB=0 + +# Docker Configuration +DOCKER_HOST=unix:///var/run/docker.sock +JOERN_IMAGE=joernio/joern:latest +JOERN_MEMORY_LIMIT=4g +JOERN_CPU_LIMIT=2 + +# Session Configuration +SESSION_TTL=3600 +SESSION_IDLE_TIMEOUT=1800 +MAX_CONCURRENT_SESSIONS=10 +PREWARM_POOL_SIZE=2 + +# CPG Generation +CPG_GENERATION_TIMEOUT=600 +MAX_REPO_SIZE_MB=500 + +# Query Execution +QUERY_TIMEOUT=30 +QUERY_CACHE_ENABLED=true +QUERY_CACHE_TTL=300 + +# Storage +WORKSPACE_ROOT=/tmp/joern-mcp +CLEANUP_ON_SHUTDOWN=true + +# GitHub (optional) +GITHUB_TOKEN= diff --git a/Dockerfile.joern b/Dockerfile.joern new file mode 100644 index 0000000..d5b81d9 --- /dev/null +++ b/Dockerfile.joern @@ -0,0 +1,41 @@ +# Dockerfile for Joern - Code Property Graph Analysis Tool +# This builds a local Joern image with all necessary tools for C/C++ analysis + +FROM eclipse-temurin:21-jdk-jammy + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + git \ + wget \ + unzip \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Set Joern version +ENV JOERN_VERSION=4.0.429 +ENV JOERN_HOME=/opt/joern + +# Download and install Joern from joernio/joern GitHub releases +RUN mkdir -p ${JOERN_HOME} && \ + cd /tmp && \ + wget -q https://github.com/joernio/joern/releases/download/v${JOERN_VERSION}/joern-install.sh && \ + chmod +x joern-install.sh && \ + sed -i 's/sudo //g' joern-install.sh && \ + ./joern-install.sh && \ + rm -rf joern-install.sh + +# Add Joern CLI tools to PATH +ENV PATH="${JOERN_HOME}/joern-cli:${JOERN_HOME}/joern-cli/bin:${PATH}" + +# Create workspace directory +RUN mkdir -p /workspace /playground + +# Set working directory +WORKDIR /workspace + +# Verify installation - check both joern and c2cpg +RUN joern --help && /opt/joern/joern-cli/c2cpg --help + +# Default command - keep container running for interactive use +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/cleanup.py b/cleanup.py new file mode 100755 index 0000000..85dea5e --- /dev/null +++ b/cleanup.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +""" +Cleanup script for Joern MCP Server + +This script helps clean up various resources: +- Redis data (sessions, query cache) +- Session files and directories +- CPG files (with optional flag) +- Docker containers + +Usage: + python cleanup.py --all # Clean everything except CPGs + python cleanup.py --redis # Clean only Redis data + python cleanup.py --sessions # Clean only session files + python cleanup.py --cpgs # Clean only CPG files + python cleanup.py --docker # Clean only Docker containers + python cleanup.py --redis --sessions # Clean Redis and sessions + python cleanup.py --all --include-cpgs # Clean everything including CPGs +""" + +import argparse +import asyncio +import logging +import os +import shutil +import sys +from pathlib import Path +from typing import List + +# Add src to path so we can import modules +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +try: + import docker + DOCKER_AVAILABLE = True +except ImportError: + DOCKER_AVAILABLE = False + +try: + import redis.asyncio as redis + REDIS_AVAILABLE = True +except ImportError: + REDIS_AVAILABLE = False + +from src.config import load_config +from src.utils.logging import setup_logging + +logger = logging.getLogger(__name__) + + +class JoernMCPCleaner: + """Cleanup utility for Joern MCP Server resources""" + + def __init__(self, config_path: str = "config.yaml"): + self.config = load_config(config_path) + self.redis_client = None + self.docker_client = None + + async def initialize(self): + """Initialize clients""" + # Initialize Redis client + if REDIS_AVAILABLE: + try: + self.redis_client = redis.Redis( + host=self.config.redis.host, + port=self.config.redis.port, + password=self.config.redis.password, + db=self.config.redis.db, + decode_responses=self.config.redis.decode_responses + ) + await self.redis_client.ping() + logger.info("Redis client connected") + except Exception as e: + logger.warning(f"Could not connect to Redis: {e}") + self.redis_client = None + + # Initialize Docker client + if DOCKER_AVAILABLE: + try: + self.docker_client = docker.from_env() + self.docker_client.ping() + logger.info("Docker client connected") + except Exception as e: + logger.warning(f"Could not connect to Docker: {e}") + self.docker_client = None + + async def cleanup_redis(self) -> bool: + """Clean up Redis data""" + if not self.redis_client: + logger.error("Redis client not available") + return False + + try: + logger.info("🧹 Cleaning up Redis data...") + + # Get all keys + all_keys = await self.redis_client.keys("*") + + if not all_keys: + logger.info(" No Redis keys found") + return True + + # Delete keys by pattern + patterns = [ + "session:*", # Session data + "sessions:*", # Session sets (like sessions:active) + "query:*", # Query cache + "container:*", # Container mappings + "joern:*" # Any joern-specific data + ] + + deleted_count = 0 + for pattern in patterns: + keys = await self.redis_client.keys(pattern) + if keys: + deleted = await self.redis_client.delete(*keys) + deleted_count += deleted + logger.info(f" Deleted {deleted} keys matching '{pattern}'") + + logger.info(f" ✅ Deleted {deleted_count} Redis keys total") + return True + + except Exception as e: + logger.error(f" ❌ Failed to cleanup Redis: {e}") + return False + + async def cleanup_sessions(self) -> bool: + """Clean up session files and directories""" + try: + logger.info("🧹 Cleaning up session files...") + + workspace_root = Path(self.config.storage.workspace_root) + + if not workspace_root.exists(): + logger.info(" No workspace directory found") + return True + + deleted_dirs = 0 + deleted_files = 0 + + # Clean up workspace directories + if workspace_root.exists(): + for item in workspace_root.iterdir(): + if item.is_dir(): + try: + shutil.rmtree(item) + deleted_dirs += 1 + logger.info(f" Deleted directory: {item.name}") + except Exception as e: + logger.error(f" Failed to delete {item}: {e}") + elif item.is_file(): + try: + item.unlink() + deleted_files += 1 + logger.info(f" Deleted file: {item.name}") + except Exception as e: + logger.error(f" Failed to delete {item}: {e}") + + # Clean up playground session directories + playground_path = Path("playground/codebases") + if playground_path.exists(): + for item in playground_path.iterdir(): + # Skip the sample directory + if item.name == "sample": + continue + + if item.is_dir(): + try: + shutil.rmtree(item) + deleted_dirs += 1 + logger.info(f" Deleted playground directory: {item.name}") + except Exception as e: + logger.error(f" Failed to delete playground {item}: {e}") + + logger.info(f" ✅ Deleted {deleted_dirs} directories and {deleted_files} files") + return True + + except Exception as e: + logger.error(f" ❌ Failed to cleanup sessions: {e}") + return False + + async def cleanup_cpgs(self) -> bool: + """Clean up CPG files""" + try: + logger.info("🧹 Cleaning up CPG files...") + + # Clean CPGs from playground + playground_cpgs = Path("playground/cpgs") + deleted_count = 0 + + if playground_cpgs.exists(): + for cpg_file in playground_cpgs.glob("*.bin"): + try: + file_size = cpg_file.stat().st_size / (1024 * 1024) # MB + cpg_file.unlink() + deleted_count += 1 + logger.info(f" Deleted CPG: {cpg_file.name} ({file_size:.2f} MB)") + except Exception as e: + logger.error(f" Failed to delete {cpg_file}: {e}") + + # Clean CPGs from workspace + workspace_root = Path(self.config.storage.workspace_root) + if workspace_root.exists(): + for cpg_file in workspace_root.rglob("*.bin"): + try: + file_size = cpg_file.stat().st_size / (1024 * 1024) # MB + cpg_file.unlink() + deleted_count += 1 + logger.info(f" Deleted workspace CPG: {cpg_file.name} ({file_size:.2f} MB)") + except Exception as e: + logger.error(f" Failed to delete {cpg_file}: {e}") + + logger.info(f" ✅ Deleted {deleted_count} CPG files") + return True + + except Exception as e: + logger.error(f" ❌ Failed to cleanup CPGs: {e}") + return False + + async def cleanup_docker(self) -> bool: + """Clean up Docker containers""" + if not self.docker_client: + logger.error("Docker client not available") + return False + + try: + logger.info("🧹 Cleaning up Docker containers...") + + # Find all joern-session containers + containers = self.docker_client.containers.list( + all=True, # Include stopped containers + filters={"name": "joern-session-"} + ) + + if not containers: + logger.info(" No Joern session containers found") + return True + + cleaned_count = 0 + for container in containers: + try: + # Stop if running + if container.status == "running": + container.stop(timeout=5) + logger.info(f" Stopped container: {container.name}") + + # Remove container + container.remove() + cleaned_count += 1 + logger.info(f" Removed container: {container.name}") + + except Exception as e: + logger.error(f" Failed to cleanup container {container.name}: {e}") + + logger.info(f" ✅ Cleaned up {cleaned_count} Docker containers") + return True + + except Exception as e: + logger.error(f" ❌ Failed to cleanup Docker: {e}") + return False + + async def cleanup_all(self, include_cpgs: bool = False) -> bool: + """Clean up all resources""" + logger.info("🧹 Starting full cleanup...") + + results = [] + + # Clean Redis + results.append(await self.cleanup_redis()) + + # Clean sessions + results.append(await self.cleanup_sessions()) + + # Clean CPGs if requested + if include_cpgs: + results.append(await self.cleanup_cpgs()) + + # Clean Docker + results.append(await self.cleanup_docker()) + + success = all(results) + + if success: + logger.info("🎉 Full cleanup completed successfully!") + else: + logger.warning("⚠️ Some cleanup operations failed") + + return success + + async def close(self): + """Close connections""" + if self.redis_client: + await self.redis_client.aclose() + + if self.docker_client: + self.docker_client.close() + + +async def main(): + """Main cleanup function""" + parser = argparse.ArgumentParser( + description="Cleanup Joern MCP Server resources", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python cleanup.py --all # Clean everything except CPGs + python cleanup.py --redis --sessions # Clean Redis and sessions only + python cleanup.py --all --include-cpgs # Clean everything including CPGs + python cleanup.py --cpgs # Clean only CPG files + python cleanup.py --docker # Clean only Docker containers + """ + ) + + parser.add_argument("--redis", action="store_true", help="Clean Redis data") + parser.add_argument("--sessions", action="store_true", help="Clean session files") + parser.add_argument("--cpgs", action="store_true", help="Clean CPG files") + parser.add_argument("--docker", action="store_true", help="Clean Docker containers") + parser.add_argument("--all", action="store_true", help="Clean all resources (except CPGs unless --include-cpgs)") + parser.add_argument("--include-cpgs", action="store_true", help="Include CPG files when using --all") + parser.add_argument("--config", default="config.yaml", help="Config file path") + parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging") + parser.add_argument("--dry-run", action="store_true", help="Show what would be cleaned without doing it") + + args = parser.parse_args() + + # Setup logging + log_level = "DEBUG" if args.verbose else "INFO" + setup_logging(log_level) + + # Check if any cleanup option is specified + if not any([args.redis, args.sessions, args.cpgs, args.docker, args.all]): + parser.print_help() + print("\nError: At least one cleanup option must be specified") + sys.exit(1) + + if args.dry_run: + logger.info("🔍 DRY RUN MODE - No changes will be made") + # In a real implementation, you'd add dry-run logic + logger.warning("Dry-run mode not fully implemented yet") + return + + # Confirm destructive operations + if args.all or args.cpgs: + print("\n⚠️ WARNING: This will permanently delete data!") + if args.all: + print(" - Redis data (sessions, query cache)") + print(" - Session files and directories") + print(" - Docker containers") + if args.cpgs or (args.all and args.include_cpgs): + print(" - CPG files (can be large and take time to regenerate)") + + confirm = input("\nContinue? [y/N]: ") + if confirm.lower() != 'y': + print("Cleanup cancelled") + return + + try: + # Initialize cleaner + cleaner = JoernMCPCleaner(args.config) + await cleaner.initialize() + + success = True + + # Perform requested cleanups + if args.all: + success = await cleaner.cleanup_all(include_cpgs=args.include_cpgs) + else: + if args.redis: + success &= await cleaner.cleanup_redis() + + if args.sessions: + success &= await cleaner.cleanup_sessions() + + if args.cpgs: + success &= await cleaner.cleanup_cpgs() + + if args.docker: + success &= await cleaner.cleanup_docker() + + await cleaner.close() + + if success: + logger.info("✅ Cleanup completed successfully") + else: + logger.error("❌ Some cleanup operations failed") + sys.exit(1) + + except KeyboardInterrupt: + logger.info("\n🛑 Cleanup interrupted by user") + sys.exit(130) + except Exception as e: + logger.error(f"❌ Cleanup failed: {e}") + sys.exit(1) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..581b545 --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,41 @@ +server: + host: ${MCP_HOST:0.0.0.0} + port: ${MCP_PORT:4242} + log_level: ${MCP_LOG_LEVEL:INFO} + +redis: + host: ${REDIS_HOST:localhost} + port: ${REDIS_PORT:6379} + password: ${REDIS_PASSWORD:} + db: ${REDIS_DB:0} + +joern: + binary_path: ${JOERN_BINARY_PATH:joern} + memory_limit: ${JOERN_MEMORY_LIMIT:4g} + +sessions: + ttl: ${SESSION_TTL:3600} + idle_timeout: ${SESSION_IDLE_TIMEOUT:1800} + max_concurrent: ${MAX_CONCURRENT_SESSIONS:50} + +cpg: + generation_timeout: ${CPG_GENERATION_TIMEOUT:600} + max_repo_size_mb: ${MAX_REPO_SIZE_MB:500} + supported_languages: + - java + - c + - cpp + - javascript + - python + - go + - kotlin + - swift + +query: + timeout: ${QUERY_TIMEOUT:30} + cache_enabled: ${QUERY_CACHE_ENABLED:true} + cache_ttl: ${QUERY_CACHE_TTL:300} + +storage: + workspace_root: ${WORKSPACE_ROOT:/tmp/joern-mcp} + cleanup_on_shutdown: ${CLEANUP_ON_SHUTDOWN:true} \ No newline at end of file diff --git a/playground/README.md b/playground/README.md new file mode 100644 index 0000000..682bbc3 --- /dev/null +++ b/playground/README.md @@ -0,0 +1,72 @@ +# Joern MCP Playground + +This directory is mounted to Joern Docker containers at `/playground`, providing a shared workspace for codebases and CPGs. + +## Directory Structure + +``` +playground/ +├── codebases/ # Source code to analyze +│ └── sample/ # Example C codebase +│ └── sample.c +└── cpgs/ # Generated Code Property Graphs (optional) +``` + +## Usage + +### For Local Codebases + +When creating a CPG session with `source_type="local"`, provide paths relative to or within `playground/codebases/`: + +```python +# Example: Analyze the sample codebase +create_cpg_session( + source_type="local", + source_path="playground/codebases/sample", # Or absolute path + language="c" +) +``` + +The Joern container will access this at `/playground/codebases/sample`. + +### For GitHub Repositories + +When using `source_type="github"`, repositories are automatically cloned into `playground/codebases/{session_id}/`: + +```python +create_cpg_session( + source_type="github", + source_path="https://github.com/user/repo", + language="java" +) +``` + +### CPG Storage + +CPGs are stored in the session workspace (`/tmp/joern-mcp/repos/{session_id}/cpg.bin`), but you can optionally store them in `playground/cpgs/` for persistence. + +## Benefits + +1. **Shared Access**: All Joern containers can access the same codebases +2. **Persistence**: Codebases survive container restarts +3. **Easy Management**: Add/remove codebases without rebuilding containers +4. **Testing**: Perfect for testing with multiple sample projects + +## Adding New Codebases + +Simply create a new directory under `playground/codebases/`: + +```bash +mkdir -p playground/codebases/my-project +cp -r /path/to/source/* playground/codebases/my-project/ +``` + +Then analyze it: + +```python +create_cpg_session( + source_type="local", + source_path="playground/codebases/my-project", + language="java" +) +``` diff --git a/playground/codebases/sample/sample.c b/playground/codebases/sample/sample.c new file mode 100644 index 0000000..4040937 --- /dev/null +++ b/playground/codebases/sample/sample.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +#define TICK_NSEC 1000000 +#define NSEC_PER_SEC 1000000000 + +struct buffer { + char *data; + size_t size; + size_t capacity; +}; + +int initialize_buffer(struct buffer *buf, size_t initial_capacity) { + buf->data = (char *)malloc(initial_capacity); + if (!buf->data) return -1; + buf->size = 0; + buf->capacity = initial_capacity; + return 0; +} + +void cleanup_buffer(struct buffer *buf) { + if (buf->data) { + free(buf->data); + buf->data = NULL; + } + buf->size = 0; + buf->capacity = 0; +} + +int resize_buffer(struct buffer *buf, size_t new_capacity) { + char *new_data = (char *)realloc(buf->data, new_capacity); + if (!new_data) return -1; + buf->data = new_data; + buf->capacity = new_capacity; + return 0; +} + +int append_to_buffer(struct buffer *buf, const char *data, size_t len) { + if (buf->size + len > buf->capacity) { + size_t new_capacity = (buf->capacity == 0) ? 1024 : buf->capacity * 2; + while (new_capacity < buf->size + len) { + new_capacity *= 2; + } + if (resize_buffer(buf, new_capacity) != 0) { + return -1; + } + } + memcpy(buf->data + buf->size, data, len); + buf->size += len; + return 0; +} + +int safe_div_u64_rem(uint64_t dividend, uint64_t divisor, uint32_t *remainder) { + if (divisor == 0) { + return -1; + } + *remainder = (uint32_t)(dividend % divisor); + return 0; +} + +void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) { + uint32_t rem; + uint64_t result = (uint64_t)jiffies * TICK_NSEC; + if (safe_div_u64_rem(result, NSEC_PER_SEC, &rem) != 0) { + value->tv_sec = 0; + value->tv_nsec = 0; + return; + } + value->tv_sec = (int64_t)(result / NSEC_PER_SEC); + value->tv_nsec = (int64_t)rem; +} + +int process_jiffies(unsigned long jiffies, struct timespec *output) { + if (!output) return -1; + jiffies_to_timespec(jiffies, output); + return 0; +} + +int validate_and_convert(struct buffer *input_buf, struct timespec *output) { + if (!input_buf || !output) return -1; + unsigned long jiffies = 0; + if (input_buf->size > 0) { + char *tmp = (char *)malloc(input_buf->size + 1); + if (!tmp) return -1; + memcpy(tmp, input_buf->data, input_buf->size); + tmp[input_buf->size] = '\0'; + char *endptr; + jiffies = strtoul(tmp, &endptr, 10); + if (*endptr != '\0') { + free(tmp); + return -1; + } + free(tmp); + } + return process_jiffies(jiffies, output); +} + +int main() { + struct buffer input_buf; + struct timespec ts; + if (initialize_buffer(&input_buf, 1024) != 0) { + return 1; + } + const char *test_input = "1000"; + if (append_to_buffer(&input_buf, test_input, strlen(test_input)) != 0) { + cleanup_buffer(&input_buf); + return 1; + } + if (validate_and_convert(&input_buf, &ts) != 0) { + cleanup_buffer(&input_buf); + return 1; + } + printf("Seconds: %ld\n", ts.tv_sec); + printf("Nanoseconds: %ld\n", ts.tv_nsec); + cleanup_buffer(&input_buf); + return 0; +} \ No newline at end of file diff --git a/playground/codebases/sample/sample_asan b/playground/codebases/sample/sample_asan new file mode 100755 index 0000000000000000000000000000000000000000..28b98d7a2ac7bc730415825dc5ad1e9ff63218fa GIT binary patch literal 29656 zcmeHwdw3khm2Y*=v^26FGnVAH!6U~83~2PQY-D5El4Y4uY*~tJ1qgxhXr?6%mS&WB zfSt?3iEM%}N^o|8Bx|!~H(8Pm##oAO(_Z%MctkVeaqL zqt$9_L~ibW_n%eS)2Gg1m!%l56hrZJUVY$GFVq*PEEWx+K!$_zjw zt7pZyUc^?iJm79l6XlHppj4!j`c&wVbQd7WEu_peaH$|O1(k&aNiJ7nC@S9BUB#tZgdv zOMfb;{7Gu)Q!njtLWkMRDc39IoN$Y*UO}pZ(ymw1(Z8m?4k`D+8llGt4+=6G-;g0>j&Mf3ap`K3q7IB}ZGR?y_so>KaW9Q1e&I@RyT z$C)lg!F2jR2FB^=zso`2nS854uaoF1pe(DdhSEnbaq~wgT5n&o}nD{ z`8ntx&7uE`Ip`F!r?c~_9P~$X(4Wph@6ADf5cK7+$(cq$n9G*1Qyrp&#xhT#P)~m% z9!jOd$#f{hLan>oLQykm_QXQ`^5lWirOfs%`rA<`5rPXd{%`}V_ZA$euER|07nQ=CdjK$O4 zN-h&uUJCW}CAz|WDOGZlnM{$L{jpSkI34K?MS4wGdXvpH!tpl2$i8r>I~EW3#XgKa z2qExAWOwz4V{x(|oyhc|M3@%o-5cr-$NCr;saGf_t|cTQa#w9{-P|0i4XmzXDm$60 z4XmM2p-ubGH4W8`=%?|I7)}}@;uyxDitk@Nav#E=n@vbsc`Q~;QS;9d&n%7=Pz24F zLG+Zo53Bp%Ccn@-=Aa*!5%jc!ULpC)JC^2LLRr#X&t1}h>bo^5dojG(HGe0FWBe{ zZS>jftOsU2@PF@tv)&8e4i3KV2_80{-^5t(&`4U(z7QPzrso;1EW1Vrc_O>g1>jxA zh`*OgCdPl6&1Uc6I0e*+@fR(e0_nu~sD)Diof!WY3#S{x#Q1|2PJwh{{D_590G$~B zh=o(&oEZO*g;T(s7!O-G1_gOgISSH4sESzpA6XP`&P62dcyu!jMa88WRwQvfU z6XTkNdpUmQ9o1e6kZ8Y!Q(#2a^SZ*@QefRci^T2zutj&I`B3J-r~R;9QYasUg^MBI`9h|_&hsa^Sjoe z7jFm-y&N3;_34h?Ej1%G&j*Ja-o!A-&MO1w8{Gl#vV-UZ4DjKGaZrN8#>)iOolDQd z(EkzM($LT^$e!{pJ3xHTNC^I~bN)47Ca~(A;Lz#d=LopLRWD zt@=rR{ALtVrW10QtiIsTt>=P6nbX0;EvJJ+C&m_FWCe#tfr9*WaP+sX;76Wj2eP+& zzgBwi-_suASxr2g0dpOEhpEAt{Kug>`&tht4UT{_hxi*Xk~5uX;48nUBrJ8aTk2(D{6mwXzo7Sh7q zTQM_G*&IF<#y_TP$kk^zT574KGn=r{;%)QOmeTm*+6+e2XZ`U9;)zZh(|zaEvROP^{47+XSm z;7(U?=$YVf^PbkBZv{`dzH--NO^CT)!csms)a*g_7+2KnYaRMo>*(vQrV|HOP{cZe zM0q#z?z3mlMuUglD=&d!bo|Mhk>KG&+=AFh<7O{thxS2D>u@I(Q1(go?ay3EbLeT& zsd3P`t+{%0=^ts32p$zF1UA#q`pr3;l!zDOwrL*nv<|n7Q1Ia5uAm`eIy*Sxy?1nj%lp7c#d8NoD8B!A%mu-w!!09p z8+i8yth84X^t7(~MS3~9sbuY`mf+)W%G5d}`XqR`x&B$kUMp&qorIRsJUtl=UM4yx zZH#^8Ex2Ux^#j*6eX!~JrW>w5Kho0;%RX)wVmi<*3;~OWy~BvW&arp38hsym{Z2jz zzHtp7H;;7?7(O;Y-9PlwU_;qj6ss|KKiQIN46esxrW}L!a$PfyLFO2P&tVJ>awAxJ z4sv7%%M>;}378#Uft7FY1b4si(IIZ`OhakOANWw>e1{Pw4vlyZj$%QLUbJX~BN~nA zYGNG{6JU_fM3(W1nz`S`lQy}T>U`);Tb&epL{%6%qvKdQjEZ4aaPWDtbl7KSCvFi0 zzE3u7x0i8MypY2G&~ww=_Yi^*`%VnLn%`LeV+ueMDgaGD)3X|D9d4t^Oj@3xb$oD$ z*YgI|GsHcsCdIVNobSGY0dW}d08{4t_SkFxf%n*G#TnJcEzs}%nvU7D)N0^~v5lbG z9Q+!Vdb05+#+IY(aeDIM;b+pAXZ3S~=d=ve+{*{43P5~}zl!^A)6lb9h9+9qz4TX1 zx##Rtt`TiTGe7c8ni_K6HH9W;-sIeQlpHyHtPxJ-5&M-|5HMQ~0E~U%EL>~Nmciqo zwGMrAGH6%eaYjy`w)d}Jm*Y+>fu>-7TF?XF^8OiymffX zisMg;JNk*SW$-1x1(QF=%V`)78^6S36}r5h>O3p}2HcfoEkfeqVJhHiv%C!zX zMjg^R^bP6(#O-|)x97KtenO)dva)l|i{NL@$l*kz<-9QdBt68#&ey7_S5Azb1^47o z8><647J}J|?-|MWQ_jbqbNPD)>&*gLGn-~TFzbO?56pUC)&sL1nDxM{2WCAm>w#Gh z%z9wf1G66Z|KS1JTQ^gx@vpA+*ZJ4_lV<<=wQCs1>;3EeW;{BOPOe{9Eu?FSuVyV6 z)2a0})vNt$>-@EK{+iYPSbQLp4s~U^L8jMEQ&UZKwbU?`{+D9A%t#_0O|AE@?2Fp> zll`UgJHqiq>i2ljS=kCphgmMna)xA11q0k&!J`b$@@SPm+e{~%!XOW7&P2+P3 z=#3;V6V-J$KI_n4{yj@eiSG`*xj0XM5*7(t9!B{ol>eM$tnwdP<=+MQ82mIkrTlTL z{B_Vb!f(_*OWyS{jTor@BGA7Cf8H`hKE>s!-It?$&MUARezWAaecUzdzS|hgb6u{3 zll1Hc-||D_vHd$HCk2+ibSY zUgr+0&b6w}=fIaiA75y%vy-dgZKFN0DU?(FR-GYHPNdm1>w#Gh%z9wf1G65O^}wtL z{;POE{k}*2o~ObuTCRStgJly>Kb0AU(|U?hkpR{Yj?`}~di|n60;YZkQz7}Wq~%Hd zW<~w(Xo;k&HM9DSiu#=qtuHC5@;BefCJ29&){;nu0Bld5$Vk9W%5wD#R7UGbB=uXG zg#u;v-&hbyiZOq?V31<$lx#n(=PBh2z|?Os#$-i`ep)tIwfmfuSHEFV_4D;BbSXZ3 z2hZCFc22^!Ezw@dfbaNKIk(`n#M81I+qZbR{WmVs)c+jNHocl^*+F9OC)V&1-y`9G zga;(NN5bP0J}TjegfB>VO2X3;;zyM{`6Tp9SS?|rgdGy@k#Kseb5qst)SMjZcWTPN zjgmqAPEGv|%qgdE_4_n6Zk4?HecHRptKX$z4+>98Ui~iZ-Q=%oZeH)dq_Zm%PiOo! zHG#T7b!AP4qcykH)&{EU0=27T0e_GZ6F?15S*rw>1M2J^v_+JELH@$&Nh?ZB<-Z%8 z2Gclm*&l%L=&)Kcm8YB*8$hxhn#*<5p9F>=q*IKm?Mq)0?aLal|Z~VQpsf;VLs32p-!(9 z7-Nl|e!{ExUOJe==KmYvs|&j!B$IE!BgpC3X@xhVLgw}z!}X+2)%u=68-BvG3*RJb zUUC&~1^ssK$AP)#m%bke(-!>-!1$8;<;0r}q`^zvajo z{Z&$OFGnVH+TZ5Bk0WRGwXnf`KS$ow7a;JvKgE$h=u1f3r#bSL{=Z?o`!gK*lYW?x zzvIX`{fDr|eUu}A)Q^$2V;p%${|T`@z>$abPf(?Q&yoM3pCY#79Qn3>5tTg1k?-i= zBSSySk?(rwHst;%?h4v!?CB(h4{@CKv@&-yL?6_Skz4+OyM?*OVfAMAPN9i&` z7wR+ZS9JM3UH*tmk+G15Vv(_k0#A{#m?n#s58H|<3>4F7E2hb<-GIx5#cniLyPNP& z)8(J&@)f#_(B*q{`CoK-l`e14MI+-%=(2<^bT)^!hAx}w(oUBT(j`Wh{kSa4^Uh5{ z(%%UK@)pc7P-TJ3T9aLYB4gP_>Vy0koMi%U1wz~SYhE8Gu2hwIY$6bIcg%3r?dF0PMvi~Cxz zx+<5DT6ZTEA74zx?t_&7Rt4paCx~Bka1qaY3=LxLz2I>#DZ35D#fQ)spHYaI&GMfB z-S8DrU}pLE5i)Py7m)W*E2!TMdJ#I$7zM4Q7+LpmFy$>SPlDjS4yyB(lpjI9_)X9k zYI7o_brtAZVc}z-^x{Hv5T~;_NvJDWD%h73nR6~21*c{jRQV4tAia1sk5Wq7 z(GMY)H-Lt8msOz#*Dc3yFHEOc&%B?Y*mcW++w*RK_};nHWY>|;-^M(hVBSf}?psVM z@Lm}Hr=%VIJ(TIBZ09P!h_P=mjA-*$WO(lH& zacg7VPf^L{lJ{`mjKMc4;PqtG1M;Eb7E+9)9i^ukeJh+%zl`|FsQ*dawF29tw^LaQ zXFGBYGpGvB7r?QK^p(u#Y8d|>fa?olycXaqD7I=Pcl(iRbQ;gA)>2RDM>-1VjEz;z zGz9b`LEjOn;GodM@}7n2%Y34^7ga?eTtQW}%@Zu6l2beSH>#?N<*F)@t_TR}j=YtS zUC!qbn#JbM2fA=TK4$T6KA=iF`lM2`aD`CQ%NG3>dNdy&n<`$NuqzV_n7Eh2U_n)4(AJg&GJ5 zrL6#YD_CLS#zI#KX7@#$;&JH0lV|}aRk)zPqC8>DEDCsANt++q8)EQc+>NWW%Cp z87VI$<#mg27x6G|Rz*1(QS4=f6&|VTW#zmh;m-pb} zT0^-lxVX2|g)g{`Ev!ISp~>adj76RX!~1KaSihvq@T85UOm}P77~YIgrq`4iMRd`w zHC9y{MQ06=2Jf13J3290TO_xt<3PO{I zv{#lI7k(5f0pD}PxbmY$qp12y0w2Xi&%4I(0lERb1h*NBtBmEX#wwDzAZ--KpekUL zZ!^3P8cV3&`BY+TFubV0$kRiXcT~fgjN!`|7u{r(P8hxb8r8rX)n2XNp_LjXzcC6t z+rX7J7Hl&X>a}G?fqq4qanT2id7d8`WgDP2P-c|tYf6oF*UkEB&CpzP2()7cW*T5e){uTCVRyHMO3-yFlT3`!G`aGy9QY#?d!)5G?* zh4*!Y)4kixcu%^QH0M;@T-VaEt^Y>0w=EIP^qHNVty@St@bJD}ng0H8a=+Aam6_gQ z?n}4EaaL9)LI;I)MA+UYn0pmJo@=KW{cs{a*_fPF*3|57x+>JUW804Qt9P`(A7Px| zwLKO$Ma87CiB3NY?~L{IrloUu1JT+jxp1Zi)l`%A=FVNa+uNX=v~f3XH@nkQOyota z@pLlW-gP6+JClW|BhBjw;TW}nWTM-qgBvtO$PuBJI!<~X-1u+ zX>Ut1nMk&osZ_Yf+??s|Hj~`FXl?|(YqCyCR7J*>}|SyL)k%oKPxyrDY$O zaC}-vGlm_@#8a`JxEb|Fdc#SfttpA7G$#^!V@eIx(VU2%}lnpZx(%u zGx2)-{9>u22m4T{_hxD_xkXMD478cLd>uK2yQ5`BJ~8m5=iY_&it~E5rHhH_Iyp>t z^@gK~o39EFu*sQf^KDx!I%TlT)P~yX>E>5UzfB$QCv-Os;S&zDVgXChluh;GkU+n~ zmvE$rCh#k~YbGNmKje??oirK!Evmt6;GG;a!vms<$+YO1Y*3{ZZ9vWs1FA){Zo*f9L!CT*gME@#;L#gvde0) zO_0Bs_gD6(dQ<7Wfay%!Z0IpDCt?wx;bf$@K^(#Auk7(xw%7VAQ|V|!PdvlrE4w>d zoBfsD@kC`;Cf1jZ#RX#SlYN>Jcru-iA(!Znr7OGX{!%%R;9U=5h6kz2G#y1r>^H?y zv94HOEWN*>Hx`YWQe|u)Tp5dWgCWvgi6BVlK_cKcbZ7edKn$n$$0NPTL>%+F(y1~< z_cbMyNTLZ~M02`DL8LF7>Q%+isYV_~DILxjrh3|wI6PBzekziQCYacX$>{b5zEC7Q z5RSlVhT#;8hSNBsG9C?Kmn243nur+_ymC(_yD`??9Wzs*bRv|F^_!^yGosF%k1VY+Or7^?NWHHm% z))7~@+YCixH-$1l=|u&W7yDCKFZ6{{6nRPRrcfMvLs62!AX!}tQg~nh?j~>HZ31o_ z#6v9nq|CT^^Q8Bo8uK-YlSfmTzO;1!sq7^LC8dF%PdXX!Q4^D=ltvI9;+X-(Nb1p_ zVRaa&2x`Qi$fT)pR3*k~6qS)Z7%0+o-e4GFt5u$&8VFQb|5M2boqDW#Fe+DNM{`$$|MAT3aX2PsU6=UpcetMrPXTXXda> z#qjRODYNKQ8PY{S$4wb0wsxliy|xm;I)%iiAHV&BOs%%Wr{LXKAJ|K*UB6YmiGH&x z+-L4hRo$FO?oD9^npHO@aL1^`d9zgmec}B*$wVd|t%@X~CXP)tQ&mEmRb^61@(j7K z>O7a#)Ktadk-kjS6pRu%pRtQpyj<6eELG|K1Ljml%ojwrZnTt_kz>+PjH*g};vAzj ztrZ<6%V|;whG;Ay7-R7sQH};7Gee?hOE)`{b+X`X7@m0D>~8s<)GrzEgg-4meZ!?Q zA3LqSAeQ`oN%xbWEzwgn@vHqGs;Cti4d)mCxkKT)yIS^$>IKQEzN!>;UK zQ^Or;I(vS{dCHhNm)#P9Lo}Q`uJ(y2I-O^&F}w|-*YURG!Ak)@oxm#nsfw0?zZCX6 z-_aIvLJJL)l$9 z_#enY{~YLZ^?BC5A-v1u>4_Zt^v>T=CFlP+=^;;43wgXY=IQku{CV)RZ<>B7lm1t4 zZ%#-(D?wi_^LEe(`gG@g(*_0VN9Pv}w-CR+*gBtA>AF+Wo%>Jjm3o}t4bZoTq~H0* z{4vmdw)4l8{!z}auu1r{9P~3e=y`}oRIl@U9QuNi=+5s)sz4`u#vJ3nj`L62p>jKN z(7UC6=NAp>9Q=3YpwqWC)6JKE1f9l%IyYI_^Upc>pUOdhHV6GzIp}ZYpfAJ#CVx7= zd|Qk0T&iQAjZax9;jTzBT@y%Pr4#Oog=pId2&ptyXMqTtJZL_Y?hi$<%EHYL&makT z{COyfbK_H?aAqIFvH(BfNt@9Co%ucu2c3}~3x(-fK7@Bv$^AH49#0UVC|(88iSjlK zi-cHu3M*e!*jG{3+98z6bcMEr@H41&>*%<7LR%$Dhr(~&+0@n&YT2=c4$lu^O&)3q zN@8%!P8Paqd;8|5?V#Y81-o(k1?Pvaj0PQ$*9*Yq=Y9H^9=x+z8lkcbg zd91!=-L!u?U|M4R^pASeJ=$U9(t9}GQ@Q_&fby~RM*^lkJDlIs zHR7)<$bu=u1Vh^7Pc2iW_}^TpQI9g`{nY`>3-~Jn@(&PLBwM%T{&NTXfs6%G`}^@g z4~Rcvz#-l*h#VHLW^#Z9;&_4&;OQiQ-W|Xb@_rkMKj~D)qLS3KxfQPmdKhQ##j`pK zMEA#0g@AaY!Aqo3Q;-nKl4f6+1Y~XiPlW*r6fA&jpeG?S^f(zv<8?9%P{aXug8MIE z_R1L38-+H7L0Y`p7V$=?RQbt}#Z3(N<7Ik4Xu}9(0eTifECbE#%Je`g9PdGdlxQs8 zolyC3R~O#HDhz)U$5eSZRAq^bVV!h@=)VS~tqutJSnGbtPVVdJhnM z-Dk)`=>n}y1olJK5UMa8O3dJa)T6ZZjJx|!>)$@#ke#I!EQ=T>;;kdtq zYX76Meg)OKAK$w3-IR*J| zbCoclS{5nzkVAez?h98?@hKGwSMX7X`~fMiV54k*F8QxJ`u)kN4Im;p{jKB_B;&o{ zQ}wIgr=0!^@OM~NxB74DbyS1W>qO1{!TCtE2g zxJt?k>g2V!N#r{wNj#_I6uCZ!{Pj})t~rWRK&Skc9P+Qy1{I_Sm16{I*)37FGl%@0 z?SkVS$M|>Fe}j~F`tOdNLVkaNs!2d4uV5^PeD*rQPg|4iNy*uvgN3v2xj{&&```+o rv^BgYXf+JB5uvJ??byf8$oWkvN@;~nt9Qz#t4wU^L?@|r- literal 0 HcmV?d00001 diff --git a/sample_client.py b/sample_client.py new file mode 100644 index 0000000..407c418 --- /dev/null +++ b/sample_client.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +""" +Sample MCP Client for Joern MCP Server + +This demonstrates how to use all the tools available in the joern-mcp server. +""" + +import asyncio +import logging +import sys + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + +try: + from fastmcp import Client +except ImportError: + logger.error("FastMCP not found. Install with: pip install fastmcp") + sys.exit(1) + + +def extract_tool_result(result): + """Extract dictionary data from CallToolResult""" + if hasattr(result, 'content') and result.content: + content_text = result.content[0].text + try: + import json + return json.loads(content_text) + except: + return {"error": content_text} + return {} + + +async def demonstrate_joern_mcp(): + """Demonstrate all Joern MCP tools""" + server_url = "http://localhost:4242/mcp" + + async with Client(server_url) as client: + logger.info("🔌 Connected to Joern MCP Server") + + # 1. Test server connectivity + await client.ping() + logger.info("✅ Server ping successful") + + # 2. List available tools + tools = await client.list_tools() + logger.info(f"📋 Available tools: {[tool.name for tool in tools]}") + + # 3. Create a CPG session from local source + logger.info("\n📁 Creating CPG session...") + session_result = await client.call_tool("create_cpg_session", { + "source_type": "local", + "source_path": "playground/codebases/sample", + "language": "c" + }) + + session_dict = extract_tool_result(session_result) + + if not session_dict.get("session_id"): + logger.error(f"❌ Session creation failed: {session_dict}") + return + + session_id = session_dict["session_id"] + logger.info(f"✅ Session created: {session_id}") + + # 4. Wait for CPG to be ready + logger.info("⏳ Waiting for CPG generation...") + for i in range(30): + status_result = await client.call_tool("get_session_status", { + "session_id": session_id + }) + + status_dict = extract_tool_result(status_result) + status = status_dict.get("status") + logger.info(f" Status: {status}") + + if status == "ready": + logger.info("✅ CPG is ready") + break + elif status == "error": + logger.error(f"❌ CPG generation failed: {status_dict.get('error_message')}") + return + + await asyncio.sleep(10) + else: + logger.error("❌ Timeout waiting for CPG") + return + + # 5. Run synchronous CPGQL queries + logger.info("\n🔍 Running synchronous queries...") + queries = [ + "cpg.method.name", + "cpg.call.name", + "cpg.file.name" + ] + + for query in queries: + result = await client.call_tool("run_cpgql_query", { + "session_id": session_id, + "query": query, + "timeout": 30 + }) + + result_dict = extract_tool_result(result) + + if result_dict.get("success"): + count = result_dict.get("row_count", 0) + time_taken = result_dict.get("execution_time", 0) + logger.info(f" ✅ {query}: {count} results in {time_taken:.2f}s") + + # Show sample data + if result_dict.get("data") and len(result_dict["data"]) > 0: + data = result_dict["data"] + logger.info(f" First 5 results:") + for i, item in enumerate(data[:5]): + if isinstance(item, dict) and "value" in item: + logger.info(f" {i+1}. {item['value']}") + else: + logger.info(f" {i+1}. {str(item)[:80]}...") + if count > 5: + logger.info(f" ... and {count - 5} more") + else: + logger.error(f" ❌ {query}: {result_dict.get('error')}") + + # 6. Run asynchronous query + logger.info("\n⚡ Running asynchronous query...") + async_result = await client.call_tool("run_cpgql_query_async", { + "session_id": session_id, + "query": "cpg.method.parameter.name", + "timeout": 60 + }) + + async_dict = extract_tool_result(async_result) + + if async_dict.get("success"): + query_id = async_dict["query_id"] + logger.info(f" ✅ Async query started: {query_id}") + + # Monitor query status + for i in range(10): + status_result = await client.call_tool("get_query_status", { + "query_id": query_id + }) + + status_dict = extract_tool_result(status_result) + query_status = status_dict.get("status") + logger.info(f" Status: {query_status}") + + if query_status == "completed": + # Get results + result = await client.call_tool("get_query_result", { + "query_id": query_id + }) + + result_dict = extract_tool_result(result) + + if result_dict.get("success"): + count = result_dict.get("row_count", 0) + logger.info(f" ✅ Async query completed: {count} results") + + # Show sample parameter names + if result_dict.get("data") and len(result_dict["data"]) > 0: + data = result_dict["data"] + logger.info(f" Sample parameter names:") + for i, item in enumerate(data[:8]): + if isinstance(item, dict) and "value" in item: + logger.info(f" {i+1}. {item['value']}") + else: + logger.info(f" {i+1}. {str(item)[:50]}...") + if count > 8: + logger.info(f" ... and {count - 8} more") + break + elif query_status == "failed": + logger.error(f" ❌ Async query failed: {status_dict.get('error')}") + break + + await asyncio.sleep(5) + + # 7. List all queries + logger.info("\n📊 Listing queries...") + queries_result = await client.call_tool("list_queries") + queries_dict = extract_tool_result(queries_result) + if queries_dict.get("success"): + total = queries_dict.get("total", 0) + logger.info(f" Total queries: {total}") + + # 8. List all sessions + logger.info("\n📋 Listing sessions...") + sessions_result = await client.call_tool("list_sessions") + sessions_dict = extract_tool_result(sessions_result) + if sessions_dict.get("sessions"): + total = sessions_dict.get("total", 0) + logger.info(f" Total sessions: {total}") + + for session in sessions_dict["sessions"]: + logger.info(f" {session['session_id']}: {session['status']} ({session['language']})") + + # 9. Filter sessions by status + logger.info("\n🔎 Filtering sessions...") + ready_sessions_result = await client.call_tool("list_sessions", {"status": "ready"}) + ready_sessions_dict = extract_tool_result(ready_sessions_result) + if ready_sessions_dict.get("sessions"): + count = len(ready_sessions_dict["sessions"]) + logger.info(f" Ready sessions: {count}") + + # 10. GitHub session example (commented out to avoid actual cloning) + """ + logger.info("\n🐙 Creating GitHub session...") + github_result = await client.call_tool("create_cpg_session", { + "source_type": "github", + "source_path": "https://github.com/joernio/sample-repo", + "language": "java", + "branch": "main" + }) + logger.info(f"GitHub session: {github_result}") + """ + + # 11. Cleanup queries + logger.info("\n🧹 Cleaning up queries...") + cleanup_result = await client.call_tool("cleanup_queries", { + "max_age_hours": 0 # Clean all + }) + + cleanup_dict = extract_tool_result(cleanup_result) + + if cleanup_dict.get("success"): + cleaned = cleanup_dict.get("cleaned_up", 0) + logger.info(f" ✅ Cleaned up {cleaned} queries") + + # 12. Close session + logger.info(f"\n🔒 Closing session {session_id}...") + close_result = await client.call_tool("close_session", { + "session_id": session_id + }) + + close_dict = extract_tool_result(close_result) + + if close_dict.get("success"): + logger.info(" ✅ Session closed successfully") + else: + logger.error(f" ❌ Failed to close session: {close_dict}") + + logger.info("\n🎉 All Joern MCP tools demonstrated successfully!") + + +async def main(): + """Main function""" + try: + await demonstrate_joern_mcp() + except Exception as e: + logger.error(f"❌ Demo failed: {e}") + raise + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..e240f72 --- /dev/null +++ b/setup.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Setup script for Joern MCP Server +# This script builds the Joern Docker image and starts Redis + +set -e + +echo "🕷️ Setting up Joern MCP Server..." +echo "" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "❌ Docker is not running. Please start Docker and try again." + exit 1 +fi + +# Build Joern image +echo "🏗️ Building Joern Docker image..." +docker build -f Dockerfile.joern -t joern:latest . --progress=plain + +# Verify the image was built +if docker images | grep -q "joern.*latest"; then + echo "✅ Joern image built successfully!" +else + echo "❌ Failed to build Joern image" + exit 1 +fi + +echo "" + +# Start Redis +echo "🚀 Starting Redis container..." + +# Check if Redis container already exists +if docker ps -a --format '{{.Names}}' | grep -q "^joern-redis$"; then + echo "ℹ️ Redis container already exists" + + # Check if it's running + if docker ps --format '{{.Names}}' | grep -q "^joern-redis$"; then + echo "✅ Redis is already running" + else + echo "▶️ Starting existing Redis container..." + docker start joern-redis + echo "✅ Redis started" + fi +else + # Create and start new Redis container + docker run -d \ + --name joern-redis \ + -p 6379:6379 \ + --restart unless-stopped \ + redis:7-alpine + echo "✅ Redis container created and started" +fi + +echo "" + +# Test Redis connection +echo "🔍 Testing Redis connection..." +if docker exec joern-redis redis-cli ping > /dev/null 2>&1; then + echo "✅ Redis is responding" +else + echo "⚠️ Redis may not be ready yet, give it a few seconds" +fi + +echo "" +echo "═══════════════════════════════════════════" +echo "✅ Setup complete!" +echo "═══════════════════════════════════════════" +echo "" +echo "📊 Image sizes:" +docker images joern:latest redis:7-alpine --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}" +echo "" +echo "🚀 Next steps:" +echo " 1. (Optional) Configure: cp config.example.yaml config.yaml" +echo " 2. Run server: python main.py" +echo " 3. Server will be available at http://localhost:4242" +echo "" diff --git a/src/exceptions.py b/src/exceptions.py new file mode 100644 index 0000000..93a7e70 --- /dev/null +++ b/src/exceptions.py @@ -0,0 +1,48 @@ +""" +Custom exceptions for Joern MCP Server +""" + + +class JoernMCPError(Exception): + """Base exception for Joern MCP""" + pass + + +class SessionNotFoundError(JoernMCPError): + """Session does not exist""" + pass + + +class SessionNotReadyError(JoernMCPError): + """Session is not in ready state""" + pass + + +class CPGGenerationError(JoernMCPError): + """CPG generation failed""" + pass + + +class QueryExecutionError(JoernMCPError): + """Query execution failed""" + pass + + +class DockerError(JoernMCPError): + """Docker operation failed""" + pass + + +class ResourceLimitError(JoernMCPError): + """Resource limit exceeded""" + pass + + +class ValidationError(JoernMCPError): + """Input validation failed""" + pass + + +class GitOperationError(JoernMCPError): + """Git operation failed""" + pass diff --git a/src/services/__init__.py b/src/services/__init__.py new file mode 100644 index 0000000..470196e --- /dev/null +++ b/src/services/__init__.py @@ -0,0 +1,16 @@ +""" +Services package for Joern MCP +""" +from .session_manager import SessionManager +from .git_manager import GitManager +from .cpg_generator import CPGGenerator +from .query_executor import QueryExecutor +from .docker_orchestrator import DockerOrchestrator + +__all__ = [ + 'SessionManager', + 'GitManager', + 'CPGGenerator', + 'QueryExecutor', + 'DockerOrchestrator' +] diff --git a/src/services/cpg_generator.py b/src/services/cpg_generator.py new file mode 100644 index 0000000..79d1650 --- /dev/null +++ b/src/services/cpg_generator.py @@ -0,0 +1,313 @@ +""" +CPG Generator for creating Code Property Graphs using Docker containers +""" +import asyncio +import logging +import os +import docker +from typing import AsyncIterator, Optional, Dict + +from ..models import CPGConfig, SessionStatus +from ..exceptions import CPGGenerationError +from .session_manager import SessionManager + +logger = logging.getLogger(__name__) + + +class CPGGenerator: + """Generates CPG from source code using Docker containers""" + + # Language-specific Joern commands + LANGUAGE_COMMANDS = { + "java": "javasrc2cpg", + "c": "c2cpg", + "cpp": "c2cpg", + "javascript": "jssrc2cpg", + "python": "pysrc2cpg", + "go": "gosrc2cpg", + "kotlin": "kotlin2cpg", + } + + def __init__( + self, + config: CPGConfig, + session_manager: Optional[SessionManager] = None + ): + self.config = config + self.session_manager = session_manager + self.docker_client: Optional[docker.DockerClient] = None + self.session_containers: Dict[str, str] = {} # session_id -> container_id + + async def initialize(self): + """Initialize Docker client""" + try: + self.docker_client = docker.from_env() + self.docker_client.ping() + logger.info("CPG Generator Docker client initialized") + except Exception as e: + logger.error(f"Failed to initialize Docker client: {e}") + raise CPGGenerationError(f"Docker initialization failed: {str(e)}") + + async def create_session_container( + self, + session_id: str, + workspace_path: str + ) -> str: + """Create a new Docker container for a session""" + try: + container_name = f"joern-session-{session_id}" + + # Container configuration for interactive Joern shell + container_config = { + "image": "joern:latest", + "name": container_name, + "detach": True, + "volumes": { + workspace_path: { + "bind": "/workspace", + "mode": "rw" + } + }, + "working_dir": "/workspace", + "environment": { + "JAVA_OPTS": "-Xmx4g" + }, + "command": "tail -f /dev/null", # Keep container running + "network_mode": "bridge" + } + + container = self.docker_client.containers.run(**container_config) + container_id = container.id + + self.session_containers[session_id] = container_id + logger.info(f"Created container {container_id} for session {session_id}") + + return container_id + + except Exception as e: + logger.error(f"Failed to create container for session {session_id}: {e}") + raise CPGGenerationError(f"Container creation failed: {str(e)}") + + async def generate_cpg( + self, + session_id: str, + source_path: str, + language: str + ) -> str: + """Generate CPG from source code in container""" + try: + logger.info(f"Starting CPG generation for session {session_id}") + + if self.session_manager: + await self.session_manager.update_status( + session_id, + SessionStatus.GENERATING.value + ) + + container_id = self.session_containers.get(session_id) + if not container_id: + raise CPGGenerationError(f"No container found for session {session_id}") + + container = self.docker_client.containers.get(container_id) + + # Generate CPG using Joern - store in playground/cpgs directory + cpg_filename = f"{session_id}.cpg" + cpg_output_path = f"/playground/cpgs/{cpg_filename}" + base_cmd = self.LANGUAGE_COMMANDS[language] + joern_cmd = await self._find_joern_executable(container, base_cmd) + command = f"{joern_cmd} /workspace -o {cpg_output_path}" + + logger.info(f"Executing CPG generation command: {command}") + + # Execute with timeout + try: + result = await asyncio.wait_for( + self._exec_command_async(container, command), + timeout=self.config.generation_timeout + ) + + logger.info(f"CPG generation output:\n{result}") + + # Validate CPG was created + if await self._validate_cpg_async(container, cpg_output_path): + if self.session_manager: + await self.session_manager.update_session( + session_id, + status=SessionStatus.READY.value, + cpg_path=cpg_output_path + ) + logger.info(f"CPG generation completed for session {session_id}") + return cpg_output_path + else: + error_msg = "CPG file was not created" + logger.error(error_msg) + if self.session_manager: + await self.session_manager.update_status( + session_id, + SessionStatus.ERROR.value, + error_msg + ) + raise CPGGenerationError(error_msg) + + except asyncio.TimeoutError: + error_msg = f"CPG generation timed out after {self.config.generation_timeout}s" + logger.error(error_msg) + if self.session_manager: + await self.session_manager.update_status( + session_id, + SessionStatus.ERROR.value, + error_msg + ) + raise CPGGenerationError(error_msg) + + except CPGGenerationError: + raise + except Exception as e: + error_msg = f"CPG generation failed: {str(e)}" + logger.error(error_msg) + if self.session_manager: + await self.session_manager.update_status( + session_id, + SessionStatus.ERROR.value, + error_msg + ) + raise CPGGenerationError(error_msg) + + async def _find_joern_executable(self, container, base_cmd: str) -> str: + """Find the correct path for Joern executable in container""" + try: + possible_paths = [ + f"/opt/joern/joern-cli/{base_cmd}", # Most likely location + f"/opt/joern/joern-cli/{base_cmd}.sh", # Shell script version + f"/opt/joern/bin/{base_cmd}", # Alternative location + f"/usr/local/bin/{base_cmd}", # System location + base_cmd # In PATH + ] + + loop = asyncio.get_event_loop() + + for path in possible_paths: + def _test_path(): + result = container.exec_run(f"test -x {path}") + return result.exit_code + + exit_code = await loop.run_in_executor(None, _test_path) + if exit_code == 0: + logger.info(f"Found Joern executable at: {path}") + return path + + # Fallback - list what's available in the joern-cli directory + logger.warning("Joern executable not found in expected paths, listing available commands...") + + def _find_commands(): + result = container.exec_run("ls -la /opt/joern/joern-cli/ | grep -E '(c2cpg|javasrc2cpg|pysrc2cpg|jssrc2cpg)' || echo 'Joern CLI tools not found'") + return result.output.decode('utf-8', errors='ignore') + + available = await loop.run_in_executor(None, _find_commands) + logger.info(f"Available Joern CLI tools: {available}") + + # Since the tools should be in PATH with our updated Dockerfile, try the base command + logger.info(f"Using base command in PATH: {base_cmd}") + return base_cmd + + except Exception as e: + logger.error(f"Error finding Joern executable: {e}") + return base_cmd + + async def _exec_command_async(self, container, command: str) -> str: + """Execute command in container asynchronously""" + loop = asyncio.get_event_loop() + + def _exec_sync(): + result = container.exec_run(command, workdir="/workspace") + return result.output.decode('utf-8', errors='ignore') + + return await loop.run_in_executor(None, _exec_sync) + + async def _validate_cpg_async(self, container, cpg_path: str) -> bool: + """Validate that CPG file was created successfully""" + try: + loop = asyncio.get_event_loop() + + def _check_file(): + # Check if file exists and get size using a more compatible command + result = container.exec_run(f"ls -la {cpg_path}") + return result.output.decode('utf-8', errors='ignore').strip() + + ls_result = await loop.run_in_executor(None, _check_file) + + # If ls succeeded and doesn't show "No such file", the file exists + if "No such file" not in ls_result and cpg_path in ls_result: + logger.info(f"CPG file created: {ls_result}") + return True + else: + logger.error(f"CPG file not found: {ls_result}") + return False + + except Exception as e: + logger.error(f"CPG validation failed: {e}") + return False + + async def get_container_id(self, session_id: str) -> Optional[str]: + """Get container ID for session""" + return self.session_containers.get(session_id) + + def register_session_container(self, session_id: str, container_id: str): + """Register an externally created container with a session""" + self.session_containers[session_id] = container_id + logger.info(f"Registered container {container_id} for session {session_id}") + + async def close_session(self, session_id: str): + """Close session container""" + container_id = self.session_containers.get(session_id) + if container_id: + try: + container = self.docker_client.containers.get(container_id) + container.stop() + container.remove() + logger.info(f"Closed container {container_id} for session {session_id}") + except Exception as e: + logger.warning(f"Error closing container for session {session_id}: {e}") + finally: + del self.session_containers[session_id] + + async def cleanup(self): + """Cleanup all session containers""" + sessions = list(self.session_containers.keys()) + for session_id in sessions: + await self.close_session(session_id) + + async def stream_logs( + self, + session_id: str, + source_path: str, + language: str, + output_path: str + ) -> AsyncIterator[str]: + """Generate CPG and stream logs""" + try: + container_id = self.session_containers.get(session_id) + if not container_id: + yield f"ERROR: No container found for session {session_id}\n" + return + + container = self.docker_client.containers.get(container_id) + + # Get the Joern command for the language + if language not in self.LANGUAGE_COMMANDS: + yield f"ERROR: Unsupported language: {language}\n" + return + + base_cmd = self.LANGUAGE_COMMANDS[language] + joern_cmd = await self._find_joern_executable(container, base_cmd) + command = f"{joern_cmd} {source_path} -o {output_path}" + + # Execute command and stream output + exec_result = container.exec_run(command, stream=True, workdir="/workspace") + + for line in exec_result.output: + yield line.decode('utf-8', errors='ignore') + + except Exception as e: + logger.error(f"Failed to stream logs: {e}") + yield f"ERROR: {str(e)}\n" diff --git a/src/services/docker_orchestrator.py b/src/services/docker_orchestrator.py new file mode 100644 index 0000000..2efa020 --- /dev/null +++ b/src/services/docker_orchestrator.py @@ -0,0 +1,108 @@ +""" +Docker orchestration for Joern MCP Server +""" +import logging +import os +import docker +from typing import Optional + +logger = logging.getLogger(__name__) + + +class DockerOrchestrator: + """Manages Docker containers for Joern CPG generation and analysis""" + + def __init__(self): + self.client: Optional[docker.DockerClient] = None + + async def initialize(self): + """Initialize Docker client""" + try: + self.client = docker.from_env() + self.client.ping() + logger.info("Docker client initialized successfully") + except Exception as e: + logger.error(f"Failed to initialize Docker client: {e}") + raise + + async def start_container( + self, + session_id: str, + workspace_path: str, + playground_path: str + ) -> str: + """Start a Docker container for the session""" + try: + if not self.client: + raise RuntimeError("Docker client not initialized") + + # Ensure directories exist + os.makedirs(workspace_path, exist_ok=True) + os.makedirs(playground_path, exist_ok=True) + + # Container configuration + container_name = f"joern-session-{session_id}" + + # Mount both workspace and playground + volumes = { + workspace_path: {'bind': '/workspace', 'mode': 'rw'}, + playground_path: {'bind': '/playground', 'mode': 'rw'} + } + + # Start container with Joern image + container = self.client.containers.run( + image="joern:latest", + name=container_name, + volumes=volumes, + detach=True, + remove=False, # Keep container for debugging + working_dir="/workspace", + command="sleep infinity" # Keep container running + ) + + logger.info(f"Started container {container.id} for session {session_id}") + return container.id + + except Exception as e: + logger.error(f"Failed to start container for session {session_id}: {e}") + raise + + async def stop_container(self, container_id: str): + """Stop and remove a Docker container""" + try: + if not self.client: + logger.warning("Docker client not initialized, cannot stop container") + return + + container = self.client.containers.get(container_id) + container.stop(timeout=10) + container.remove() + + logger.info(f"Stopped and removed container {container_id}") + + except docker.errors.NotFound: + logger.warning(f"Container {container_id} not found, may already be removed") + except Exception as e: + logger.error(f"Failed to stop container {container_id}: {e}") + + async def cleanup(self): + """Cleanup all running containers""" + try: + if not self.client: + return + + # Find all containers with joern-session prefix + containers = self.client.containers.list( + filters={"name": "joern-session-*"} + ) + + for container in containers: + try: + container.stop(timeout=5) + container.remove() + logger.info(f"Cleaned up container {container.id}") + except Exception as e: + logger.error(f"Failed to cleanup container {container.id}: {e}") + + except Exception as e: + logger.error(f"Error during Docker cleanup: {e}") \ No newline at end of file diff --git a/src/services/git_manager.py b/src/services/git_manager.py new file mode 100644 index 0000000..28107b2 --- /dev/null +++ b/src/services/git_manager.py @@ -0,0 +1,131 @@ +""" +Git repository manager for cloning and managing GitHub repositories +""" +import asyncio +import logging +import os +import shutil +from typing import Optional, Dict +from urllib.parse import urlparse +import git + +from ..exceptions import GitOperationError, ValidationError +from ..utils.validators import validate_github_url + +logger = logging.getLogger(__name__) + + +class GitManager: + """Handles GitHub repository operations""" + + def __init__(self, workspace_root: str): + self.workspace_root = workspace_root + self.repos_dir = os.path.join(workspace_root, "repos") + os.makedirs(self.repos_dir, exist_ok=True) + + async def clone_repository( + self, + repo_url: str, + target_path: str, + branch: Optional[str] = None, + token: Optional[str] = None + ) -> str: + """Clone a GitHub repository""" + try: + # Validate URL + validate_github_url(repo_url) + + # Parse URL and inject token if provided + if token: + parsed = urlparse(repo_url) + auth_url = f"{parsed.scheme}://{token}@{parsed.netloc}{parsed.path}" + else: + auth_url = repo_url + + # Create target directory + os.makedirs(target_path, exist_ok=True) + source_path = os.path.join(target_path, "source") + + # Clone in a thread pool (git operations are blocking) + loop = asyncio.get_event_loop() + await loop.run_in_executor( + None, + self._do_clone, + auth_url, + source_path, + branch + ) + + logger.info(f"Cloned repository {repo_url} to {source_path}") + return source_path + + except ValidationError: + raise + except Exception as e: + logger.error(f"Failed to clone repository: {e}") + raise GitOperationError(f"Failed to clone repository: {str(e)}") + + def _do_clone(self, url: str, target: str, branch: Optional[str]): + """Blocking clone operation""" + try: + if branch: + git.Repo.clone_from(url, target, branch=branch, depth=1) + else: + git.Repo.clone_from(url, target, depth=1) + except Exception as e: + raise GitOperationError(f"Git clone failed: {str(e)}") + + async def validate_repository(self, repo_url: str) -> bool: + """Validate that repository exists and is accessible""" + try: + validate_github_url(repo_url) + # Could add additional checks here (API call to check if repo exists) + return True + except Exception as e: + logger.error(f"Repository validation failed: {e}") + return False + + async def get_repository_info(self, repo_url: str) -> Dict: + """Get repository information""" + try: + validate_github_url(repo_url) + parsed = urlparse(repo_url) + parts = parsed.path.strip('/').split('/') + + return { + "owner": parts[0] if len(parts) > 0 else "", + "repo": parts[1] if len(parts) > 1 else "", + "url": repo_url + } + except Exception as e: + logger.error(f"Failed to get repository info: {e}") + raise GitOperationError(f"Failed to parse repository URL: {str(e)}") + + def parse_github_url(self, url: str) -> Dict: + """Parse GitHub URL into components""" + try: + validate_github_url(url) + parsed = urlparse(url) + parts = parsed.path.strip('/').split('/') + + # Remove .git suffix if present + repo = parts[1].replace('.git', '') if len(parts) > 1 else "" + + return { + "owner": parts[0] if len(parts) > 0 else "", + "repo": repo, + "host": parsed.netloc, + "scheme": parsed.scheme + } + except Exception as e: + logger.error(f"Failed to parse GitHub URL: {e}") + raise GitOperationError(f"Invalid GitHub URL: {str(e)}") + + async def cleanup_repository(self, target_path: str): + """Clean up cloned repository""" + try: + if os.path.exists(target_path): + shutil.rmtree(target_path) + logger.info(f"Cleaned up repository at {target_path}") + except Exception as e: + logger.error(f"Failed to cleanup repository: {e}") diff --git a/src/services/query_executor.py b/src/services/query_executor.py new file mode 100644 index 0000000..5465766 --- /dev/null +++ b/src/services/query_executor.py @@ -0,0 +1,560 @@ +""" +Interactive query executor for running CPGQL queries with persistent Joern shell in Docker containers +""" +import asyncio +import json +import logging +import os +import tempfile +import time +import uuid +import docker +from typing import Optional, Dict, Any +from enum import Enum + +from ..models import QueryResult, QueryConfig, JoernConfig +from ..exceptions import QueryExecutionError +from ..utils.redis_client import RedisClient +from ..utils.validators import validate_cpgql_query, hash_query + +logger = logging.getLogger(__name__) + + +class QueryStatus(str, Enum): + """Query execution status""" + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + + +class QueryExecutor: + """Executes CPGQL queries using persistent Joern shells in Docker containers""" + + def __init__( + self, + config: QueryConfig, + joern_config: JoernConfig, + redis_client: Optional[RedisClient] = None, + cpg_generator=None + ): + self.config = config + self.joern_config = joern_config + self.redis = redis_client + self.cpg_generator = cpg_generator + self.docker_client: Optional[docker.DockerClient] = None + self.session_containers: Dict[str, str] = {} # session_id -> container_id + self.session_cpgs: Dict[str, str] = {} + self.query_status: Dict[str, Dict[str, Any]] = {} # query_id -> status info + + async def initialize(self): + """Initialize Docker client""" + try: + self.docker_client = docker.from_env() + logger.info("QueryExecutor initialized with Docker client") + except Exception as e: + logger.error(f"Failed to initialize Docker client: {e}") + raise QueryExecutionError(f"Docker initialization failed: {str(e)}") + + def _get_joern_command(self) -> str: + """Get the correct joern command path""" + # With our updated Dockerfile, joern should be in PATH + # But we can also specify the full path as fallback + return "joern" + + def set_cpg_generator(self, cpg_generator): + """Set reference to CPG generator""" + self.cpg_generator = cpg_generator + + async def execute_query_async( + self, + session_id: str, + cpg_path: str, + query: str, + timeout: Optional[int] = None + ) -> str: + """Execute a CPGQL query asynchronously and return query UUID""" + try: + # Generate unique query ID + query_id = str(uuid.uuid4()) + + # Validate query + validate_cpgql_query(query) + + # Use container CPG path consistently + container_cpg_path = "/workspace/cpg.bin" + + # Normalize query to ensure JSON output and pipe to file + query_normalized = self._normalize_query_for_json(query.strip()) + output_file = f"/tmp/query_{query_id}.json" + query_with_pipe = f"{query_normalized} #> \"{output_file}\"" + + # Initialize query status + self.query_status[query_id] = { + "status": QueryStatus.PENDING.value, + "session_id": session_id, + "query": query, + "output_file": output_file, + "created_at": time.time(), + "error": None + } + + # Start async execution + asyncio.create_task(self._execute_query_background(query_id, session_id, container_cpg_path, query_with_pipe, timeout)) + + logger.info(f"Started async query {query_id} for session {session_id}") + return query_id + + except Exception as e: + logger.error(f"Failed to start async query: {e}") + raise QueryExecutionError(f"Query initialization failed: {str(e)}") + + async def _execute_query_background( + self, + query_id: str, + session_id: str, + cpg_path: str, + query_with_pipe: str, + timeout: Optional[int] + ): + """Execute query in background""" + try: + # Update status to running + self.query_status[query_id]["status"] = QueryStatus.RUNNING.value + self.query_status[query_id]["started_at"] = time.time() + + # Execute query using the same approach as sync queries + result = await self._execute_query_in_shell(session_id, query_with_pipe.split(' #>')[0], timeout or self.config.timeout) + + if result.success: + # Update status to completed + self.query_status[query_id]["status"] = QueryStatus.COMPLETED.value + self.query_status[query_id]["completed_at"] = time.time() + self.query_status[query_id]["result"] = result.to_dict() + logger.info(f"Query {query_id} completed successfully") + else: + # Update status to failed + self.query_status[query_id]["status"] = QueryStatus.FAILED.value + self.query_status[query_id]["error"] = result.error + self.query_status[query_id]["completed_at"] = time.time() + logger.error(f"Query {query_id} failed: {result.error}") + + except Exception as e: + # Update status to failed + self.query_status[query_id]["status"] = QueryStatus.FAILED.value + self.query_status[query_id]["error"] = str(e) + self.query_status[query_id]["completed_at"] = time.time() + + logger.error(f"Query {query_id} failed: {e}") + + async def get_query_status(self, query_id: str) -> Dict[str, Any]: + """Get status of a query""" + if query_id not in self.query_status: + raise QueryExecutionError(f"Query {query_id} not found") + + status_info = self.query_status[query_id].copy() + + # Add execution time if completed + if "completed_at" in status_info and "started_at" in status_info: + status_info["execution_time"] = status_info["completed_at"] - status_info["started_at"] + + return status_info + + async def get_query_result(self, query_id: str) -> QueryResult: + """Get result of a completed query""" + if query_id not in self.query_status: + raise QueryExecutionError(f"Query {query_id} not found") + + status_info = self.query_status[query_id] + + if status_info["status"] == QueryStatus.FAILED.value: + return QueryResult( + success=False, + error=status_info.get("error", "Query failed"), + execution_time=status_info.get("execution_time", 0) + ) + + if status_info["status"] != QueryStatus.COMPLETED.value: + raise QueryExecutionError(f"Query {query_id} is not completed yet (status: {status_info['status']})") + + # Return the stored result + if "result" in status_info: + return QueryResult(**status_info["result"]) + else: + # Fallback for compatibility + execution_time = status_info.get("execution_time", 0) + return QueryResult( + success=True, + data=[], + row_count=0, + execution_time=execution_time + ) + + async def _get_container_id(self, session_id: str) -> Optional[str]: + """Get container ID for session""" + if self.cpg_generator: + container_id = await self.cpg_generator.get_container_id(session_id) + logger.debug(f"Got container ID from CPG generator for session {session_id}: {container_id}") + return container_id + container_id = self.session_containers.get(session_id) + logger.debug(f"Got container ID from local cache for session {session_id}: {container_id}") + return container_id + + async def _read_file_from_container(self, session_id: str, file_path: str) -> str: + """Read file content from Docker container""" + container_id = await self._get_container_id(session_id) + if not container_id: + raise QueryExecutionError(f"No container found for session {session_id}") + + try: + container = self.docker_client.containers.get(container_id) + result = container.exec_run(f"cat {file_path}") + + if result.exit_code == 0: + return result.output.decode('utf-8', errors='ignore') + else: + raise QueryExecutionError(f"Failed to read file {file_path}: exit code {result.exit_code}") + + except Exception as e: + raise QueryExecutionError(f"Failed to read file {file_path}: {str(e)}") + + async def execute_query( + self, + session_id: str, + cpg_path: str, + query: str, + timeout: Optional[int] = None + ) -> QueryResult: + """Execute a CPGQL query synchronously (for backwards compatibility)""" + start_time = time.time() + + try: + # Validate query + validate_cpgql_query(query) + + # Normalize query to ensure JSON output + query_normalized = self._normalize_query_for_json(query.strip()) + + # Check cache if enabled + if self.config.cache_enabled and self.redis: + query_hash_val = hash_query(query_normalized) + cached = await self.redis.get_cached_query(session_id, query_hash_val) + if cached: + logger.info(f"Query cache hit for session {session_id}") + cached['execution_time'] = time.time() - start_time + return QueryResult(**cached) + + # Use container CPG path consistently + container_cpg_path = "/workspace/cpg.bin" + + # Ensure CPG is loaded in session + await self._ensure_cpg_loaded(session_id, container_cpg_path) + + # Execute query + timeout_val = timeout or self.config.timeout + result = await self._execute_query_in_shell(session_id, query_normalized, timeout_val) + result.execution_time = time.time() - start_time + + # Cache result if enabled + if self.config.cache_enabled and self.redis and result.success: + query_hash_val = hash_query(query_normalized) + await self.redis.cache_query_result( + session_id, + query_hash_val, + result.to_dict(), + self.config.cache_ttl + ) + + logger.info( + f"Query executed for session {session_id}: " + f"{result.row_count} rows in {result.execution_time:.2f}s" + ) + + return result + + except QueryExecutionError as e: + logger.error(f"Query execution error: {e}") + return QueryResult( + success=False, + error=str(e), + execution_time=time.time() - start_time + ) + except Exception as e: + logger.error(f"Unexpected error executing query: {e}") + logger.exception(e) + return QueryResult( + success=False, + error=f"Query execution failed: {str(e)}", + execution_time=time.time() - start_time + ) + + async def list_queries(self, session_id: Optional[str] = None) -> Dict[str, Any]: + """List all queries or queries for a specific session""" + if session_id: + return { + query_id: status_info + for query_id, status_info in self.query_status.items() + if status_info["session_id"] == session_id + } + else: + return self.query_status.copy() + + async def cleanup_query(self, query_id: str): + """Clean up query resources""" + if query_id in self.query_status: + status_info = self.query_status[query_id] + + # Clean up output file if it exists + if "output_file" in status_info: + try: + session_id = status_info["session_id"] + output_file = status_info["output_file"] + + # Execute rm command in container to clean up file + container_id = await self._get_container_id(session_id) + if container_id: + container = self.docker_client.containers.get(container_id) + container.exec_run(f"rm -f {output_file}") + except Exception as e: + logger.warning(f"Failed to cleanup output file for query {query_id}: {e}") + + # Remove from tracking + del self.query_status[query_id] + logger.info(f"Cleaned up query {query_id}") + + async def cleanup_old_queries(self, max_age_seconds: int = 3600): + """Clean up old completed queries""" + current_time = time.time() + to_cleanup = [] + + for query_id, status_info in self.query_status.items(): + if status_info["status"] in [QueryStatus.COMPLETED.value, QueryStatus.FAILED.value]: + age = current_time - status_info.get("completed_at", status_info["created_at"]) + if age > max_age_seconds: + to_cleanup.append(query_id) + + for query_id in to_cleanup: + await self.cleanup_query(query_id) + + if to_cleanup: + logger.info(f"Cleaned up {len(to_cleanup)} old queries") + + def _normalize_query_for_json(self, query: str) -> str: + """Normalize query to ensure JSON output""" + # Remove any existing output modifiers + query = query.strip() + if query.endswith('.l'): + query = query[:-2] + elif query.endswith('.toList'): + query = query[:-7] + elif query.endswith('.toJson'): + query = query[:-7] + elif query.endswith('.toJsonPretty'): + query = query[:-13] + + # Add .toJsonPretty for proper JSON output + return query + '.toJsonPretty' + + async def _ensure_cpg_loaded(self, session_id: str, cpg_path: str): + """Ensure CPG is loaded in the Joern session""" + # Load CPG if not already loaded or if different CPG + current_cpg = self.session_cpgs.get(session_id) + if current_cpg != cpg_path: + await self._load_cpg_in_container(session_id, cpg_path) + self.session_cpgs[session_id] = cpg_path + + async def _load_cpg_in_container(self, session_id: str, cpg_path: str): + """Load CPG in the container using direct joern command""" + logger.info(f"Loading CPG for session {session_id}: {cpg_path}") + + container_id = await self._get_container_id(session_id) + if not container_id: + logger.error(f"No container found for session {session_id}") + raise QueryExecutionError(f"No container found for session {session_id}") + + logger.info(f"Loading CPG {cpg_path} in container {container_id} for session {session_id}") + + try: + # Start Joern shell and load CPG in one command + container = self.docker_client.containers.get(container_id) + joern_cmd = self._get_joern_command() + + # Create a simple script to load CPG + script_content = f'''#!/bin/bash +echo 'importCpg("{cpg_path}")' | {joern_cmd} +''' + + # Write script to container using a simpler approach + script_result = container.exec_run([ + "sh", "-c", + f"cat > /tmp/load_cpg.sh << 'EOF'\n{script_content}EOF\nchmod +x /tmp/load_cpg.sh" + ]) + + if script_result.exit_code != 0: + error_output = script_result.output.decode('utf-8', errors='ignore') if script_result.output else "No output" + logger.error(f"Failed to create CPG loading script: {error_output}") + raise QueryExecutionError(f"Failed to create CPG loading script: {error_output}") + + # Execute the script + load_result = container.exec_run(["/bin/bash", "/tmp/load_cpg.sh"]) + + if load_result.exit_code != 0: + error_msg = load_result.output.decode('utf-8', errors='ignore') if load_result.output else "No output" + logger.error(f"Failed to load CPG: {error_msg}") + raise QueryExecutionError(f"Failed to load CPG: {error_msg}") + + logger.info(f"CPG loaded successfully for session {session_id}") + + except Exception as e: + logger.error(f"Failed to load CPG in container: {e}") + raise QueryExecutionError(f"Failed to load CPG: {str(e)}") + + async def _execute_query_in_shell(self, session_id: str, query: str, timeout: int) -> QueryResult: + """Execute query in the container""" + logger.debug(f"Executing query in session {session_id}: {query[:100]}...") + + container_id = await self._get_container_id(session_id) + if not container_id: + raise QueryExecutionError(f"No container found for session {session_id}") + + try: + container = self.docker_client.containers.get(container_id) + + # Use the CPG file from workspace + cpg_path = "/workspace/cpg.bin" + + # Create unique output file for this query + output_file = f"/tmp/query_result_{session_id}_{int(time.time())}.json" + + # Create query with pipe to JSON file + query_with_pipe = f"{query} #> \"{output_file}\"" + + # Create a simple script that loads CPG and executes query + script_content = f'''#!/bin/bash +set -e + +# Check if CPG file exists +if [ ! -f "{cpg_path}" ]; then + echo "ERROR: CPG file not found at {cpg_path}" >&2 + exit 1 +fi + +# Execute joern with the query +echo '{query_with_pipe}' | timeout {timeout} joern {cpg_path} +''' + + # Write script to container + script_result = container.exec_run([ + "sh", "-c", + f"cat > /tmp/query.sh << 'SCRIPT_EOF'\n{script_content}SCRIPT_EOF\nchmod +x /tmp/query.sh" + ]) + + if script_result.exit_code != 0: + error_output = script_result.output.decode('utf-8', errors='ignore') if script_result.output else "No output" + logger.error(f"Failed to create query script: {error_output}") + raise QueryExecutionError(f"Failed to create query script: {error_output}") + + # Execute the script + loop = asyncio.get_event_loop() + + def _exec_sync(): + result = container.exec_run(["/bin/bash", "/tmp/query.sh"], workdir="/workspace") + return result + + exec_result = await loop.run_in_executor(None, _exec_sync) + + logger.debug(f"Query execution exit code: {exec_result.exit_code}") + + if exec_result.exit_code != 0: + output = exec_result.output.decode('utf-8', errors='ignore') if exec_result.output else "" + logger.error(f"Query execution failed with exit code {exec_result.exit_code}: {output}") + return QueryResult( + success=False, + error=f"Query execution failed: {output}" + ) + + # Read the JSON result file + try: + def _read_file(): + result = container.exec_run(f"cat {output_file}") + return result + + file_result = await loop.run_in_executor(None, _read_file) + + if file_result.exit_code != 0: + logger.warning(f"No output file created, query returned no results") + return QueryResult(success=True, data=[], row_count=0) + + json_content = file_result.output.decode('utf-8', errors='ignore') + + # Clean up the output file + container.exec_run(f"rm -f {output_file}") + + if not json_content.strip(): + return QueryResult(success=True, data=[], row_count=0) + + # Parse JSON content + try: + data = json.loads(json_content) + + # Normalize data to list + if isinstance(data, dict): + data = [data] + elif not isinstance(data, list): + data = [{"value": str(data)}] + + logger.info(f"Successfully parsed {len(data)} results from query") + + return QueryResult( + success=True, + data=data, + row_count=len(data) + ) + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON output: {e}") + logger.debug(f"Raw JSON content: {json_content[:500]}...") + + # Return as string value if JSON parsing fails + return QueryResult( + success=True, + data=[{"value": json_content.strip()}], + row_count=1 + ) + + except Exception as e: + logger.error(f"Failed to read query result file: {e}") + return QueryResult( + success=False, + error=f"Failed to read result: {str(e)}" + ) + + except Exception as e: + logger.error(f"Error executing query in container: {e}") + return QueryResult( + success=False, + error=f"Query execution error: {str(e)}" + ) + + async def close_session(self, session_id: str): + """Close query executor session resources""" + if session_id in self.session_cpgs: + del self.session_cpgs[session_id] + + # Remove from container mapping if present + if session_id in self.session_containers: + del self.session_containers[session_id] + + logger.info(f"Closed query executor resources for session {session_id}") + + async def cleanup(self): + """Cleanup all sessions and queries""" + # Cleanup all queries + query_ids = list(self.query_status.keys()) + for query_id in query_ids: + await self.cleanup_query(query_id) + + # Cleanup session resources + sessions = list(self.session_cpgs.keys()) + for session_id in sessions: + await self.close_session(session_id) \ No newline at end of file diff --git a/src/services/session_manager.py b/src/services/session_manager.py new file mode 100644 index 0000000..2352ad8 --- /dev/null +++ b/src/services/session_manager.py @@ -0,0 +1,212 @@ +""" +Session manager for CPG session lifecycle management +""" +import logging +import uuid +from datetime import datetime +from typing import Optional, List, Dict, Any + +from ..models import Session, SessionStatus, SessionConfig +from ..utils.redis_client import RedisClient +from ..exceptions import SessionNotFoundError, ResourceLimitError + +logger = logging.getLogger(__name__) + + +class SessionManager: + """Manages CPG session lifecycle and metadata""" + + def __init__(self, redis_client: RedisClient, config: SessionConfig): + self.redis = redis_client + self.config = config + self.docker_cleanup_callback = None # Will be set by the service that has Docker access + + def set_docker_cleanup_callback(self, callback): + """Set the callback function for Docker container cleanup""" + self.docker_cleanup_callback = callback + + async def create_session( + self, + source_type: str, + source_path: str, + language: str, + options: Dict[str, Any] + ) -> Session: + """Create a new CPG session""" + try: + # Check concurrent session limit and auto-cleanup if needed + active_sessions = await self.redis.list_sessions() + if len(active_sessions) >= self.config.max_concurrent: + logger.info(f"Session limit reached ({len(active_sessions)}/{self.config.max_concurrent}), cleaning up oldest sessions") + await self._cleanup_oldest_sessions(10) # Clean up 10 oldest sessions + + # Create session + session = Session( + id=str(uuid.uuid4()), + source_type=source_type, + source_path=source_path, + language=language, + status=SessionStatus.INITIALIZING.value, + metadata=options + ) + + # Save to Redis + await self.redis.save_session(session, self.config.ttl) + + logger.info(f"Created session {session.id}") + return session + + except Exception as e: + logger.error(f"Failed to create session: {e}") + raise + + async def get_session(self, session_id: str) -> Optional[Session]: + """Get session by ID""" + try: + session = await self.redis.get_session(session_id) + if not session: + return None + return session + except Exception as e: + logger.error(f"Failed to get session {session_id}: {e}") + return None + + async def update_session( + self, + session_id: str, + **updates + ): + """Update session fields""" + try: + # Update last_accessed + updates['last_accessed'] = datetime.utcnow() + await self.redis.update_session(session_id, updates, self.config.ttl) + logger.debug(f"Updated session {session_id}") + except Exception as e: + logger.error(f"Failed to update session {session_id}: {e}") + raise + + async def update_status( + self, + session_id: str, + status: str, + error_message: Optional[str] = None + ): + """Update session status""" + updates = { + 'status': status, + 'last_accessed': datetime.utcnow() + } + + if error_message: + updates['error_message'] = error_message + + await self.redis.update_session(session_id, updates, self.config.ttl) + logger.info(f"Session {session_id} status: {status}") + + async def list_sessions(self, filters: Optional[Dict[str, str]] = None) -> List[Session]: + """List all sessions with optional filtering""" + try: + session_ids = await self.redis.list_sessions() + sessions = [] + + for session_id in session_ids: + session = await self.get_session(session_id) + if session: + # Apply filters + if filters: + match = True + for key, value in filters.items(): + if getattr(session, key, None) != value: + match = False + break + if match: + sessions.append(session) + else: + sessions.append(session) + + return sessions + + except Exception as e: + logger.error(f"Failed to list sessions: {e}") + return [] + + async def touch_session(self, session_id: str): + """Refresh session TTL""" + try: + await self.redis.touch_session(session_id, self.config.ttl) + await self.update_session(session_id, last_accessed=datetime.utcnow()) + except Exception as e: + logger.error(f"Failed to touch session {session_id}: {e}") + + async def cleanup_session(self, session_id: str): + """Clean up session and associated resources""" + try: + session = await self.get_session(session_id) + if not session: + raise SessionNotFoundError(f"Session {session_id} not found") + + # Delete container mapping if exists + if session.container_id: + await self.redis.delete_container_mapping(session.container_id) + + # Delete session + await self.redis.delete_session(session_id) + + logger.info(f"Cleaned up session {session_id}") + + except SessionNotFoundError: + raise + except Exception as e: + logger.error(f"Failed to cleanup session {session_id}: {e}") + raise + + async def _cleanup_oldest_sessions(self, count: int = 10): + """Clean up the oldest sessions to make room for new ones""" + try: + # Get all sessions sorted by creation time (oldest first) + sessions = await self.list_sessions() + if not sessions: + return + + # Sort by created_at timestamp (oldest first) + sorted_sessions = sorted(sessions, key=lambda s: s.created_at) + + # Clean up the oldest 'count' sessions + sessions_to_cleanup = sorted_sessions[:count] + + logger.info(f"Auto-cleaning up {len(sessions_to_cleanup)} oldest sessions") + + for session in sessions_to_cleanup: + try: + # Stop Docker container if it exists and we have a cleanup callback + if session.container_id and self.docker_cleanup_callback: + await self.docker_cleanup_callback(session.container_id) + + # Clean up session data + await self.cleanup_session(session.id) + logger.info(f"Auto-cleaned up old session {session.id}") + except Exception as e: + logger.error(f"Failed to auto-cleanup session {session.id}: {e}") + + except Exception as e: + logger.error(f"Failed to cleanup oldest sessions: {e}") + + async def cleanup_idle_sessions(self): + """Clean up sessions that have been idle too long""" + try: + sessions = await self.list_sessions() + now = datetime.utcnow() + + for session in sessions: + idle_time = (now - session.last_accessed).total_seconds() + + if idle_time > self.config.idle_timeout: + logger.info( + f"Cleaning up idle session {session.id} " + f"(idle for {idle_time:.0f} seconds)" + ) + await self.cleanup_session(session.id) + + except Exception as e: + logger.error(f"Failed to cleanup idle sessions: {e}") diff --git a/src/tools/__init__.py b/src/tools/__init__.py new file mode 100644 index 0000000..3664cba --- /dev/null +++ b/src/tools/__init__.py @@ -0,0 +1,6 @@ +""" +Tools package +""" +from .mcp_tools import register_tools + +__all__ = ['register_tools'] diff --git a/src/tools/mcp_tools.py b/src/tools/mcp_tools.py new file mode 100644 index 0000000..e048e1d --- /dev/null +++ b/src/tools/mcp_tools.py @@ -0,0 +1,1152 @@ +""" +MCP Tool Definitions for Joern MCP Server +""" +import asyncio +import logging +import os +import hashlib +import time +from datetime import datetime +from typing import Optional, Dict, Any + +from ..models import SessionStatus +from ..exceptions import ( + SessionNotFoundError, + SessionNotReadyError, + ValidationError, + ResourceLimitError, + QueryExecutionError +) +from ..utils.validators import ( + validate_source_type, + validate_language, + validate_session_id, + validate_github_url, + validate_local_path, + validate_cpgql_query +) + +logger = logging.getLogger(__name__) + + +def get_cpg_cache_path(source_path: str, language: str, playground_path: str) -> str: + """ + Generate a deterministic CPG cache path based on source path and language. + Uses SHA256 hash of the source path to create unique but reproducible filename. + """ + # Create a unique identifier from source path and language + identifier = f"{source_path}:{language}" + hash_digest = hashlib.sha256(identifier.encode()).hexdigest()[:16] + + # Create CPG filename + cpg_filename = f"cpg_{hash_digest}_{language}.bin" + cpg_cache_path = os.path.join(playground_path, "cpgs", cpg_filename) + + return cpg_cache_path + + +def register_tools(mcp, services: dict): + """Register all MCP tools with the FastMCP server""" + + @mcp.tool() + async def create_cpg_session( + source_type: str, + source_path: str, + language: str, + github_token: Optional[str] = None, + branch: Optional[str] = None + ) -> Dict[str, Any]: + """ + Creates a new CPG analysis session. + + This tool initiates CPG generation for a codebase. For GitHub repositories, + it clones the repo first. For local paths, it uses the existing directory. + The CPG generation happens asynchronously in a Docker container. + + Args: + source_type: Either "local" or "github" + source_path: For local: absolute path to source directory + For github: full GitHub URL (e.g., https://github.com/user/repo) + language: Programming language - one of: java, c, cpp, javascript, python, go, kotlin + github_token: GitHub Personal Access Token for private repositories (optional) + branch: Specific git branch to checkout (optional, defaults to default branch) + + Returns: + { + "session_id": "unique-session-id", + "status": "initializing" | "generating", + "message": "CPG generation started", + "estimated_time": "2-5 minutes" + } + + Examples: + # GitHub repository + create_cpg_session( + source_type="github", + source_path="https://github.com/joernio/sample-repo", + language="java" + ) + + # Local directory + create_cpg_session( + source_type="local", + source_path="/home/user/projects/myapp", + language="python" + ) + """ + try: + # Validate inputs + validate_source_type(source_type) + validate_language(language) + + session_manager = services['session_manager'] + git_manager = services['git_manager'] + docker_orch = services['docker'] + cpg_generator = services['cpg_generator'] + storage_config = services['config'].storage + + # Create session + session = await session_manager.create_session( + source_type=source_type, + source_path=source_path, + language=language, + options={ + 'github_token': github_token, + 'branch': branch + } + ) + + # Handle source preparation + workspace_path = os.path.join(storage_config.workspace_root, "repos", session.id) + + # Get playground path (absolute) + playground_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'playground')) + + if source_type == "github": + validate_github_url(source_path) + # Clone to playground/codebases instead + target_path = os.path.join(playground_path, "codebases", session.id) + os.makedirs(target_path, exist_ok=True) + + await git_manager.clone_repository( + repo_url=source_path, + target_path=target_path, + branch=branch, + token=github_token + ) + # Path inside container + container_source_path = f"/playground/codebases/{session.id}" + else: + # For local paths, check if it's relative to playground/codebases + if source_path.startswith("playground/codebases/") or "/playground/codebases/" in source_path: + # Already in playground, use directly + if not os.path.isabs(source_path): + source_path = os.path.abspath(source_path) + + if not os.path.exists(source_path): + raise ValidationError(f"Path does not exist: {source_path}") + if not os.path.isdir(source_path): + raise ValidationError(f"Path is not a directory: {source_path}") + + # Get relative path from playground root + rel_path = os.path.relpath(source_path, playground_path) + container_source_path = f"/playground/{rel_path}" + + logger.info(f"Using local source from playground: {source_path} -> {container_source_path}") + else: + # Copy to playground/codebases + import shutil + + # Validate the path exists on the host system + if not os.path.isabs(source_path): + raise ValidationError("Local path must be absolute or relative to playground/codebases") + + # Detect if we're running in a container + in_container = ( + os.path.exists("/.dockerenv") or + os.path.exists("/run/.containerenv") or + os.path.exists("/host/home/") + ) + + container_check_path = source_path + if in_container and source_path.startswith("/home/"): + container_check_path = source_path.replace("/home/", "/host/home/", 1) + logger.info(f"Running in container, translated path: {source_path} -> {container_check_path}") + + if not os.path.exists(container_check_path): + raise ValidationError(f"Path does not exist: {source_path}") + if not os.path.isdir(container_check_path): + raise ValidationError(f"Path is not a directory: {source_path}") + + # Copy to playground/codebases + target_path = os.path.join(playground_path, "codebases", session.id) + os.makedirs(target_path, exist_ok=True) + + logger.info(f"Copying local source from {container_check_path} to {target_path}") + + for item in os.listdir(container_check_path): + src_item = os.path.join(container_check_path, item) + dst_item = os.path.join(target_path, item) + + if os.path.isdir(src_item): + shutil.copytree(src_item, dst_item, dirs_exist_ok=True) + else: + shutil.copy2(src_item, dst_item) + + container_source_path = f"/playground/codebases/{session.id}" + + # Create workspace directory for CPG storage + os.makedirs(workspace_path, exist_ok=True) + + # Ensure playground/cpgs directory exists + cpgs_dir = os.path.join(playground_path, "cpgs") + os.makedirs(cpgs_dir, exist_ok=True) + + # Check if CPG already exists in cache + cpg_cache_path = get_cpg_cache_path(source_path, language, playground_path) + cpg_exists = os.path.exists(cpg_cache_path) + + if cpg_exists: + logger.info(f"Found existing CPG in cache: {cpg_cache_path}") + # Copy cached CPG to workspace + import shutil + cpg_path = os.path.join(workspace_path, "cpg.bin") + shutil.copy2(cpg_cache_path, cpg_path) + + # Start Docker container with playground mount + container_id = await docker_orch.start_container( + session_id=session.id, + workspace_path=workspace_path, + playground_path=playground_path + ) + + # Register container with CPG generator + cpg_generator.register_session_container(session.id, container_id) + + # Update session as ready immediately + await session_manager.update_session( + session_id=session.id, + container_id=container_id, + status=SessionStatus.READY.value, + cpg_path=cpg_path + ) + + # Map container to session + redis_client = services['redis'] + await redis_client.set_container_mapping( + container_id, + session.id, + services['config'].sessions.ttl + ) + + return { + "session_id": session.id, + "status": SessionStatus.READY.value, + "message": "Loaded existing CPG from cache", + "cached": True + } + else: + logger.info(f"No cached CPG found, will generate new one") + + # Start Docker container with playground mount + container_id = await docker_orch.start_container( + session_id=session.id, + workspace_path=workspace_path, + playground_path=playground_path + ) + + # Register container with CPG generator + cpg_generator.register_session_container(session.id, container_id) + + # Update session with container ID + await session_manager.update_session( + session_id=session.id, + container_id=container_id, + status=SessionStatus.GENERATING.value + ) + + # Map container to session + redis_client = services['redis'] + await redis_client.set_container_mapping( + container_id, + session.id, + services['config'].sessions.ttl + ) + + # Start async CPG generation + cpg_path = os.path.join(workspace_path, "cpg.bin") + + # Create a task that will also cache the CPG after generation + async def generate_and_cache(): + await cpg_generator.generate_cpg( + session_id=session.id, + source_path=container_source_path, + language=language + ) + # Cache the CPG after successful generation + if os.path.exists(cpg_path): + import shutil + shutil.copy2(cpg_path, cpg_cache_path) + logger.info(f"Cached CPG to: {cpg_cache_path}") + + asyncio.create_task(generate_and_cache()) + + return { + "session_id": session.id, + "status": SessionStatus.GENERATING.value, + "message": "CPG generation started", + "estimated_time": "2-5 minutes", + "cached": False + } + + except ValidationError as e: + logger.error(f"Validation error: {e}") + return { + "success": False, + "error": { + "code": "VALIDATION_ERROR", + "message": str(e) + } + } + except ResourceLimitError as e: + logger.error(f"Resource limit error: {e}") + return { + "success": False, + "error": { + "code": "RESOURCE_LIMIT_EXCEEDED", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Failed to create session: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": "Failed to create session", + "details": str(e) + } + } + + @mcp.tool() + async def run_cpgql_query_async( + session_id: str, + query: str, + timeout: int = 30 + ) -> Dict[str, Any]: + """ + Executes a CPGQL query asynchronously and returns a query ID for status tracking. + + This tool starts a CPGQL query execution and returns immediately with a query ID. + Use get_query_status to check progress and get_query_result to retrieve results. + Results are automatically saved to JSON files in the container. + + Args: + session_id: The session ID returned from create_cpg_session + query: CPGQL query string (automatically converted to JSON output) + timeout: Maximum execution time in seconds (default: 30) + + Returns: + { + "success": true, + "query_id": "query-uuid-123", + "status": "pending", + "message": "Query started successfully" + } + """ + try: + # Validate inputs + validate_session_id(session_id) + validate_cpgql_query(query) + + session_manager = services['session_manager'] + query_executor = services['query_executor'] + + # Get and validate session + session = await session_manager.get_session(session_id) + if not session: + raise SessionNotFoundError(f"Session {session_id} not found") + + if session.status != SessionStatus.READY.value: + raise SessionNotReadyError( + f"Session is in '{session.status}' status. " + f"Wait for CPG generation to complete." + ) + + # Update last accessed time + await session_manager.touch_session(session_id) + + # Start async query execution + query_id = await query_executor.execute_query_async( + session_id=session_id, + cpg_path=session.cpg_path, + query=query, + timeout=timeout + ) + + return { + "success": True, + "query_id": query_id, + "status": "pending", + "message": "Query started successfully" + } + + except SessionNotFoundError as e: + logger.error(f"Session not found: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_FOUND", + "message": str(e) + } + } + except SessionNotReadyError as e: + logger.warning(f"Session not ready: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_READY", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": "Failed to start query", + "details": str(e) + } + } + + @mcp.tool() + async def get_query_status(query_id: str) -> Dict[str, Any]: + """ + Get the status of an asynchronously running query. + + Check if a query started with run_cpgql_query_async is still running, + completed, or failed. Provides execution time and error information. + + Args: + query_id: The query ID returned from run_cpgql_query_async + + Returns: + { + "query_id": "query-uuid-123", + "status": "running" | "completed" | "failed" | "pending", + "session_id": "session-123", + "query": "cpg.method.name.toJson", + "created_at": 1697524800.0, + "execution_time": 1.23, + "error": null + } + """ + try: + query_executor = services['query_executor'] + + status_info = await query_executor.get_query_status(query_id) + + return { + "success": True, + **status_info + } + + except QueryExecutionError as e: + logger.error(f"Query status error: {e}") + return { + "success": False, + "error": { + "code": "QUERY_NOT_FOUND", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def get_query_result(query_id: str) -> Dict[str, Any]: + """ + Get the result of a completed query. + + Retrieve the JSON results from a query that has completed execution. + The query must be in "completed" status to retrieve results. + + Args: + query_id: The query ID returned from run_cpgql_query_async + + Returns: + { + "success": true, + "data": [ + {"property1": "value1", "property2": "value2"}, + ... + ], + "row_count": 10, + "execution_time": 1.23 + } + """ + try: + query_executor = services['query_executor'] + + result = await query_executor.get_query_result(query_id) + + return { + "success": result.success, + "data": result.data, + "row_count": result.row_count, + "execution_time": result.execution_time, + "error": result.error + } + + except QueryExecutionError as e: + logger.error(f"Query result error: {e}") + return { + "success": False, + "error": { + "code": "QUERY_ERROR", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def list_queries(session_id: Optional[str] = None) -> Dict[str, Any]: + """ + List default CPGQL queries for code analysis and vulnerability detection. + + Returns a comprehensive list of predefined queries for common code analysis tasks, + including generic browsing, security vulnerability detection, and memory safety checks. + + Args: + session_id: Filter queries by session ID (optional, for backward compatibility) + + Returns: + { + "success": true, + "queries": [ + { + "name": "All Methods", + "description": "List all method/function names in the codebase", + "query": "cpg.method.name.l", + "category": "browsing" + }, + ... + ], + "total": 15, + "categories": ["browsing", "security", "memory", "structure"] + } + """ + # Default CPGQL queries for various analysis tasks + default_queries = [ + { + "name": "All Methods", + "description": "List all method/function names in the codebase", + "query": "cpg.method.name.l", + "category": "browsing" + }, + { + "name": "All Function Calls", + "description": "List all function call names in the codebase", + "query": "cpg.call.name.l", + "category": "browsing" + }, + { + "name": "All Classes/Types", + "description": "List all class and type declarations", + "query": "cpg.typeDecl.name.l", + "category": "structure" + }, + { + "name": "All Files", + "description": "List all source files in the codebase", + "query": "cpg.file.name.l", + "category": "browsing" + }, + { + "name": "All Imports", + "description": "List all import statements", + "query": "cpg.import.code.l", + "category": "structure" + }, + { + "name": "All String Literals", + "description": "List all string literals in the code", + "query": "cpg.literal.code.l", + "category": "browsing" + }, + { + "name": "SQL Injection Vulnerabilities", + "description": "Find potential SQL injection vulnerabilities", + "query": "cpg.call.name(\".*execute.*\").argument.code(\".*\\\\+.*\").l", + "category": "security" + }, + { + "name": "Command Injection", + "description": "Find potential command injection vulnerabilities", + "query": "cpg.call.name(\"system|exec|popen\").argument.code(\".*\\\\+.*\").l", + "category": "security" + }, + { + "name": "Hardcoded Secrets", + "description": "Find hardcoded passwords, secrets, and tokens", + "query": "cpg.literal.code(\".*(password|secret|key|token|credential).*\").l", + "category": "security" + }, + { + "name": "Path Traversal", + "description": "Find potential path traversal vulnerabilities", + "query": "cpg.literal.code(\".*\\\\.\\\\..*\").l", + "category": "security" + }, + { + "name": "Buffer Overflow Risks", + "description": "Find unsafe buffer operations that may cause overflows", + "query": "cpg.call.name(\"strcpy|strcat|strncpy|strncat|gets|sprintf|vsprintf\").l", + "category": "memory" + }, + { + "name": "Memory Allocation", + "description": "Find all memory allocation calls", + "query": "cpg.call.name(\"malloc|calloc|realloc|new\").l", + "category": "memory" + }, + { + "name": "Memory Deallocation", + "description": "Find all memory deallocation calls", + "query": "cpg.call.name(\"free|delete\").l", + "category": "memory" + }, + { + "name": "Null Pointer Dereferences", + "description": "Find potential null pointer dereferences", + "query": "cpg.call.argument.isNull.l", + "category": "memory" + }, + { + "name": "Uninitialized Variables", + "description": "Find potentially uninitialized local variables", + "query": "cpg.local.whereNot(_.definingBlock).l", + "category": "memory" + }, + { + "name": "Control Structures", + "description": "List all control structures (if, while, for, etc.)", + "query": "cpg.controlStructure.code.l", + "category": "structure" + }, + { + "name": "Return Statements", + "description": "List all return statements", + "query": "cpg.return.code.l", + "category": "structure" + }, + { + "name": "XSS Vulnerabilities", + "description": "Find potential cross-site scripting vulnerabilities", + "query": "cpg.call.name(\".*write.*\").argument.code(\".*(request|param).*\").l", + "category": "security" + }, + { + "name": "All Parameters", + "description": "List all function parameters", + "query": "cpg.parameter.name.l", + "category": "structure" + }, + { + "name": "All Local Variables", + "description": "List all local variables", + "query": "cpg.local.name.l", + "category": "structure" + } + ] + + # Extract unique categories + categories = list(set(q["category"] for q in default_queries)) + + # For backward compatibility, if session_id is provided, also include running queries + if session_id: + try: + query_executor = services['query_executor'] + running_queries = await query_executor.list_queries(session_id) + + return { + "success": True, + "default_queries": default_queries, + "running_queries": running_queries, + "total_default": len(default_queries), + "total_running": len(running_queries), + "categories": categories + } + except Exception as e: + logger.warning(f"Could not fetch running queries: {e}") + + return { + "success": True, + "queries": default_queries, + "total": len(default_queries), + "categories": categories + } + + @mcp.tool() + async def cleanup_queries( + session_id: Optional[str] = None, + max_age_hours: int = 1 + ) -> Dict[str, Any]: + """ + Clean up old completed queries to free resources. + + Remove old query results and temporary files from completed or failed queries. + Helps maintain system performance by cleaning up accumulated query data. + + Args: + session_id: Only cleanup queries for specific session (optional) + max_age_hours: Remove queries older than this many hours (default: 1) + + Returns: + { + "success": true, + "cleaned_up": 3, + "message": "Cleaned up 3 old queries" + } + """ + try: + query_executor = services['query_executor'] + + max_age_seconds = max_age_hours * 3600 + + if session_id: + # Get queries for specific session + queries = await query_executor.list_queries(session_id) + cleaned_count = 0 + + for query_id, query_info in queries.items(): + if query_info["status"] in ["completed", "failed"]: + age = time.time() - query_info.get("completed_at", query_info["created_at"]) + if age > max_age_seconds: + await query_executor.cleanup_query(query_id) + cleaned_count += 1 + else: + # Cleanup all old queries + await query_executor.cleanup_old_queries(max_age_seconds) + # We don't have an exact count for this method + cleaned_count = "multiple" + + return { + "success": True, + "cleaned_up": cleaned_count, + "message": f"Cleaned up {cleaned_count} old queries" + } + + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def run_cpgql_query( + session_id: str, + query: str, + timeout: int = 30 + ) -> Dict[str, Any]: + """ + Executes a CPGQL query synchronously on a loaded CPG. + + This tool runs CPGQL queries against the Code Property Graph and waits + for completion before returning results. For long-running queries, + consider using run_cpgql_query_async instead. + + Args: + session_id: The session ID returned from create_cpg_session + query: CPGQL query string (automatically converted to JSON output) + timeout: Maximum execution time in seconds (default: 30) + + Returns: + { + "success": true, + "data": [ + {"property1": "value1", "property2": "value2"}, + ... + ], + "row_count": 10, + "execution_time": 1.23 + } + """ + try: + # Validate inputs + validate_session_id(session_id) + validate_cpgql_query(query) + + session_manager = services['session_manager'] + query_executor = services['query_executor'] + + # Get and validate session + session = await session_manager.get_session(session_id) + if not session: + raise SessionNotFoundError(f"Session {session_id} not found") + + if session.status != SessionStatus.READY.value: + raise SessionNotReadyError( + f"Session is in '{session.status}' status. " + f"Wait for CPG generation to complete." + ) + + # Update last accessed time + await session_manager.touch_session(session_id) + + # Execute query synchronously + # Use container path for CPG instead of host path + container_cpg_path = "/workspace/cpg.bin" + result = await query_executor.execute_query( + session_id=session_id, + cpg_path=container_cpg_path, + query=query, + timeout=timeout + ) + + return { + "success": result.success, + "data": result.data, + "row_count": result.row_count, + "execution_time": result.execution_time, + "error": result.error + } + + except SessionNotFoundError as e: + logger.error(f"Session not found: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_FOUND", + "message": str(e) + } + } + except SessionNotReadyError as e: + logger.warning(f"Session not ready: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_READY", + "message": str(e) + } + } + except QueryExecutionError as e: + logger.error(f"Query execution error: {e}") + return { + "success": False, + "error": { + "code": "QUERY_EXECUTION_ERROR", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": "Query execution failed", + "details": str(e) + } + } + + @mcp.tool() + async def get_session_status(session_id: str) -> Dict[str, Any]: + """ + Gets the current status of a CPG session. + + Use this tool to check if CPG generation is complete and the session + is ready for queries. Also provides metadata about the session. + + Args: + session_id: The session ID to query + + Returns: + { + "session_id": "abc-123", + "status": "ready" | "generating" | "error" | "initializing", + "source_type": "github" | "local", + "source_path": "https://github.com/user/repo", + "language": "java", + "created_at": "2025-10-07T10:00:00Z", + "last_accessed": "2025-10-07T10:05:00Z", + "cpg_size": "125MB", + "error_message": null + } + """ + try: + validate_session_id(session_id) + + session_manager = services['session_manager'] + session = await session_manager.get_session(session_id) + + if not session: + raise SessionNotFoundError(f"Session {session_id} not found") + + # Get CPG file size if available + cpg_size = None + if session.cpg_path and os.path.exists(session.cpg_path): + size_bytes = os.path.getsize(session.cpg_path) + cpg_size = f"{size_bytes / (1024*1024):.2f}MB" + + return { + "session_id": session.id, + "status": session.status, + "source_type": session.source_type, + "source_path": session.source_path, + "language": session.language, + "created_at": session.created_at.isoformat(), + "last_accessed": session.last_accessed.isoformat(), + "cpg_size": cpg_size, + "error_message": session.error_message + } + + except SessionNotFoundError as e: + logger.error(f"Session not found: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_FOUND", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Error getting session status: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def list_sessions( + status: Optional[str] = None, + source_type: Optional[str] = None + ) -> Dict[str, Any]: + """ + Lists all active CPG sessions with optional filtering. + + Args: + status: Filter by status (optional): "ready", "generating", "error", "initializing" + source_type: Filter by source type (optional): "local", "github" + + Returns: + { + "sessions": [ + { + "session_id": "abc-123", + "status": "ready", + "source_path": "https://github.com/user/repo", + "language": "java", + "created_at": "2025-10-07T10:00:00Z" + }, + ... + ], + "total": 5 + } + """ + try: + session_manager = services['session_manager'] + + filters = {} + if status: + filters['status'] = status + if source_type: + filters['source_type'] = source_type + + sessions = await session_manager.list_sessions(filters) + + return { + "sessions": [ + { + "session_id": s.id, + "status": s.status, + "source_type": s.source_type, + "source_path": s.source_path, + "language": s.language, + "created_at": s.created_at.isoformat(), + "last_accessed": s.last_accessed.isoformat() + } + for s in sessions + ], + "total": len(sessions) + } + + except Exception as e: + logger.error(f"Error listing sessions: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def close_session(session_id: str) -> Dict[str, Any]: + """ + Closes a CPG session and cleans up resources. + + This stops the Docker container, removes temporary files, and frees + up resources. Sessions are also automatically cleaned up after being + idle for 30 minutes. + + Args: + session_id: The session ID to close + + Returns: + { + "success": true, + "message": "Session closed successfully" + } + """ + try: + validate_session_id(session_id) + + session_manager = services['session_manager'] + docker_orch = services['docker'] + + session = await session_manager.get_session(session_id) + if not session: + raise SessionNotFoundError(f"Session {session_id} not found") + + # Stop container + if session.container_id: + await docker_orch.stop_container(session.container_id) + + # Cleanup session + await session_manager.cleanup_session(session_id) + + return { + "success": True, + "message": "Session closed successfully" + } + + except SessionNotFoundError as e: + logger.error(f"Session not found: {e}") + return { + "success": False, + "error": { + "code": "SESSION_NOT_FOUND", + "message": str(e) + } + } + except Exception as e: + logger.error(f"Error closing session: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + + @mcp.tool() + async def cleanup_all_sessions( + max_age_hours: Optional[int] = None, + force: bool = False + ) -> Dict[str, Any]: + """ + Cleanup multiple sessions and their containers. + + This tool helps maintain the system by cleaning up old or inactive sessions. + Use with caution as it will stop containers and remove session data. + + Args: + max_age_hours: Only cleanup sessions older than this many hours (optional) + force: If true, cleanup all sessions regardless of age (default: False) + + Returns: + { + "success": true, + "cleaned_up": 5, + "session_ids": ["id1", "id2", ...], + "message": "Cleaned up 5 sessions" + } + """ + try: + session_manager = services['session_manager'] + docker_orch = services['docker'] + + # Get all sessions + all_sessions = await session_manager.list_sessions({}) + + sessions_to_cleanup = [] + + for session in all_sessions: + should_cleanup = False + + if force: + should_cleanup = True + elif max_age_hours: + age = datetime.utcnow() - session.last_accessed + if age.total_seconds() / 3600 > max_age_hours: + should_cleanup = True + + if should_cleanup: + sessions_to_cleanup.append(session) + + cleaned_session_ids = [] + errors = [] + + for session in sessions_to_cleanup: + try: + # Stop container + if session.container_id: + await docker_orch.stop_container(session.container_id) + + # Cleanup session + await session_manager.cleanup_session(session.id) + cleaned_session_ids.append(session.id) + logger.info(f"Cleaned up session: {session.id}") + + except Exception as e: + error_msg = f"Failed to cleanup session {session.id}: {str(e)}" + logger.error(error_msg) + errors.append(error_msg) + + result = { + "success": True, + "cleaned_up": len(cleaned_session_ids), + "session_ids": cleaned_session_ids, + "message": f"Cleaned up {len(cleaned_session_ids)} sessions" + } + + if errors: + result["errors"] = errors + result["message"] += f" ({len(errors)} errors)" + + return result + + except Exception as e: + logger.error(f"Error during cleanup: {e}", exc_info=True) + return { + "success": False, + "error": { + "code": "INTERNAL_ERROR", + "message": str(e) + } + } + diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..befefdd --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,31 @@ +""" +Utilities package +""" +from .redis_client import RedisClient +from .validators import ( + validate_source_type, + validate_language, + validate_session_id, + validate_github_url, + validate_local_path, + validate_cpgql_query, + hash_query, + sanitize_path, + validate_timeout +) +from .logging import setup_logging, get_logger + +__all__ = [ + 'RedisClient', + 'validate_source_type', + 'validate_language', + 'validate_session_id', + 'validate_github_url', + 'validate_local_path', + 'validate_cpgql_query', + 'hash_query', + 'sanitize_path', + 'validate_timeout', + 'setup_logging', + 'get_logger' +] diff --git a/src/utils/logging.py b/src/utils/logging.py new file mode 100644 index 0000000..789bbb9 --- /dev/null +++ b/src/utils/logging.py @@ -0,0 +1,41 @@ +""" +Logging configuration +""" +import logging +import sys +from typing import Optional + + +def setup_logging(log_level: str = "INFO"): + """Setup structured logging""" + level = getattr(logging, log_level.upper(), logging.INFO) + + # Create formatter + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S' + ) + + # Setup root logger + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # Remove existing handlers + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(level) + console_handler.setFormatter(formatter) + root_logger.addHandler(console_handler) + + # Reduce noise from libraries + logging.getLogger("docker").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("git").setLevel(logging.WARNING) + + +def get_logger(name: str) -> logging.Logger: + """Get logger instance""" + return logging.getLogger(name) diff --git a/src/utils/redis_client.py b/src/utils/redis_client.py new file mode 100644 index 0000000..3c1f5ed --- /dev/null +++ b/src/utils/redis_client.py @@ -0,0 +1,109 @@ +""" +Redis client wrapper for session management +""" +import json +import logging +from typing import Optional, Dict, Any, List +import redis.asyncio as aioredis + +from ..models import RedisConfig, Session + +logger = logging.getLogger(__name__) + + +class RedisClient: + """Async Redis client for session storage""" + + def __init__(self, config: RedisConfig): + self.config = config + self.client: Optional[aioredis.Redis] = None + + async def connect(self): + """Establish Redis connection""" + try: + self.client = await aioredis.from_url( + f"redis://{self.config.host}:{self.config.port}/{self.config.db}", + password=self.config.password, + decode_responses=self.config.decode_responses + ) + await self.client.ping() + logger.info("Connected to Redis successfully") + except Exception as e: + logger.error(f"Failed to connect to Redis: {e}") + raise + + async def close(self): + """Close Redis connection""" + if self.client: + await self.client.close() + logger.info("Closed Redis connection") + + async def save_session(self, session: Session, ttl: int = 3600): + """Save session to Redis""" + key = f"session:{session.id}" + data = json.dumps(session.to_dict()) + await self.client.set(key, data, ex=ttl) + await self.client.sadd("sessions:active", session.id) + logger.debug(f"Saved session {session.id}") + + async def get_session(self, session_id: str) -> Optional[Session]: + """Retrieve session from Redis""" + key = f"session:{session_id}" + data = await self.client.get(key) + if data: + session_dict = json.loads(data) + return Session.from_dict(session_dict) + return None + + async def update_session(self, session_id: str, updates: Dict[str, Any], ttl: int = 3600): + """Update session fields""" + session = await self.get_session(session_id) + if session: + for key, value in updates.items(): + setattr(session, key, value) + await self.save_session(session, ttl) + + async def delete_session(self, session_id: str): + """Delete session from Redis""" + key = f"session:{session_id}" + await self.client.delete(key) + await self.client.srem("sessions:active", session_id) + logger.debug(f"Deleted session {session_id}") + + async def list_sessions(self) -> List[str]: + """List all active session IDs""" + return list(await self.client.smembers("sessions:active")) + + async def touch_session(self, session_id: str, ttl: int = 3600): + """Refresh session TTL""" + key = f"session:{session_id}" + await self.client.expire(key, ttl) + + async def set_container_mapping(self, container_id: str, session_id: str, ttl: int = 3600): + """Map container ID to session ID""" + key = f"container:{container_id}" + await self.client.set(key, session_id, ex=ttl) + + async def get_session_by_container(self, container_id: str) -> Optional[str]: + """Get session ID by container ID""" + key = f"container:{container_id}" + return await self.client.get(key) + + async def delete_container_mapping(self, container_id: str): + """Delete container mapping""" + key = f"container:{container_id}" + await self.client.delete(key) + + async def cache_query_result(self, session_id: str, query_hash: str, result: Dict[str, Any], ttl: int = 300): + """Cache query result""" + key = f"query:{session_id}:{query_hash}" + data = json.dumps(result) + await self.client.set(key, data, ex=ttl) + + async def get_cached_query(self, session_id: str, query_hash: str) -> Optional[Dict[str, Any]]: + """Get cached query result""" + key = f"query:{session_id}:{query_hash}" + data = await self.client.get(key) + if data: + return json.loads(data) + return None diff --git a/src/utils/validators.py b/src/utils/validators.py new file mode 100644 index 0000000..0a5fa51 --- /dev/null +++ b/src/utils/validators.py @@ -0,0 +1,113 @@ +""" +Input validation utilities +""" +import re +import hashlib +from urllib.parse import urlparse +from typing import Optional + +from ..exceptions import ValidationError +from ..models import SourceType + + +def validate_source_type(source_type: str): + """Validate source type""" + valid_types = [e.value for e in SourceType] + if source_type not in valid_types: + raise ValidationError( + f"Invalid source_type '{source_type}'. Must be one of: {', '.join(valid_types)}" + ) + + +def validate_language(language: str): + """Validate programming language""" + supported = ["java", "c", "cpp", "javascript", "python", "go", "kotlin"] + if language not in supported: + raise ValidationError( + f"Unsupported language '{language}'. Supported: {', '.join(supported)}" + ) + + +def validate_session_id(session_id: str): + """Validate session ID format""" + if not session_id or not isinstance(session_id, str): + raise ValidationError("session_id must be a non-empty string") + + # UUID pattern + uuid_pattern = r'^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$' + if not re.match(uuid_pattern, session_id): + raise ValidationError("session_id must be a valid UUID") + + +def validate_github_url(url: str) -> bool: + """Validate GitHub URL format""" + try: + parsed = urlparse(url) + if parsed.netloc not in ["github.com", "www.github.com"]: + raise ValidationError("Only GitHub URLs are supported") + + # Check for valid path format: /owner/repo + parts = parsed.path.strip('/').split('/') + if len(parts) < 2: + raise ValidationError("Invalid GitHub URL format. Expected: https://github.com/owner/repo") + + return True + except Exception as e: + raise ValidationError(f"Invalid GitHub URL: {str(e)}") + + +def validate_local_path(path: str) -> bool: + """Validate local file path""" + import os + if not os.path.isabs(path): + raise ValidationError("Local path must be absolute") + + if not os.path.exists(path): + raise ValidationError(f"Path does not exist: {path}") + + if not os.path.isdir(path): + raise ValidationError(f"Path is not a directory: {path}") + + return True + + +def validate_cpgql_query(query: str): + """Validate CPGQL query""" + if not query or not isinstance(query, str): + raise ValidationError("Query must be a non-empty string") + + if len(query) > 10000: + raise ValidationError("Query too long (max 10000 characters)") + + # Basic safety checks + dangerous_patterns = [ + r'System\.exit', + r'Runtime\.getRuntime', + r'ProcessBuilder', + r'java\.io\.File.*delete', + ] + + for pattern in dangerous_patterns: + if re.search(pattern, query, re.IGNORECASE): + raise ValidationError(f"Query contains potentially dangerous operation: {pattern}") + + +def hash_query(query: str) -> str: + """Generate hash for query caching""" + return hashlib.sha256(query.encode()).hexdigest() + + +def sanitize_path(path: str) -> str: + """Sanitize file path""" + # Remove any .. or other path traversal attempts + path = re.sub(r'\.\.+', '', path) + return path + + +def validate_timeout(timeout: int, max_timeout: int = 300): + """Validate timeout value""" + if timeout < 1: + raise ValidationError("Timeout must be at least 1 second") + + if timeout > max_timeout: + raise ValidationError(f"Timeout cannot exceed {max_timeout} seconds") From 85ef617520664e0bde45d419b52854346290fe18 Mon Sep 17 00:00:00 2001 From: Ahmed Lekssays Date: Tue, 7 Oct 2025 19:55:49 +0300 Subject: [PATCH 3/3] remove artifact --- playground/codebases/sample/sample_asan | Bin 29656 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 playground/codebases/sample/sample_asan diff --git a/playground/codebases/sample/sample_asan b/playground/codebases/sample/sample_asan deleted file mode 100755 index 28b98d7a2ac7bc730415825dc5ad1e9ff63218fa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 29656 zcmeHwdw3khm2Y*=v^26FGnVAH!6U~83~2PQY-D5El4Y4uY*~tJ1qgxhXr?6%mS&WB zfSt?3iEM%}N^o|8Bx|!~H(8Pm##oAO(_Z%MctkVeaqL zqt$9_L~ibW_n%eS)2Gg1m!%l56hrZJUVY$GFVq*PEEWx+K!$_zjw zt7pZyUc^?iJm79l6XlHppj4!j`c&wVbQd7WEu_peaH$|O1(k&aNiJ7nC@S9BUB#tZgdv zOMfb;{7Gu)Q!njtLWkMRDc39IoN$Y*UO}pZ(ymw1(Z8m?4k`D+8llGt4+=6G-;g0>j&Mf3ap`K3q7IB}ZGR?y_so>KaW9Q1e&I@RyT z$C)lg!F2jR2FB^=zso`2nS854uaoF1pe(DdhSEnbaq~wgT5n&o}nD{ z`8ntx&7uE`Ip`F!r?c~_9P~$X(4Wph@6ADf5cK7+$(cq$n9G*1Qyrp&#xhT#P)~m% z9!jOd$#f{hLan>oLQykm_QXQ`^5lWirOfs%`rA<`5rPXd{%`}V_ZA$euER|07nQ=CdjK$O4 zN-h&uUJCW}CAz|WDOGZlnM{$L{jpSkI34K?MS4wGdXvpH!tpl2$i8r>I~EW3#XgKa z2qExAWOwz4V{x(|oyhc|M3@%o-5cr-$NCr;saGf_t|cTQa#w9{-P|0i4XmzXDm$60 z4XmM2p-ubGH4W8`=%?|I7)}}@;uyxDitk@Nav#E=n@vbsc`Q~;QS;9d&n%7=Pz24F zLG+Zo53Bp%Ccn@-=Aa*!5%jc!ULpC)JC^2LLRr#X&t1}h>bo^5dojG(HGe0FWBe{ zZS>jftOsU2@PF@tv)&8e4i3KV2_80{-^5t(&`4U(z7QPzrso;1EW1Vrc_O>g1>jxA zh`*OgCdPl6&1Uc6I0e*+@fR(e0_nu~sD)Diof!WY3#S{x#Q1|2PJwh{{D_590G$~B zh=o(&oEZO*g;T(s7!O-G1_gOgISSH4sESzpA6XP`&P62dcyu!jMa88WRwQvfU z6XTkNdpUmQ9o1e6kZ8Y!Q(#2a^SZ*@QefRci^T2zutj&I`B3J-r~R;9QYasUg^MBI`9h|_&hsa^Sjoe z7jFm-y&N3;_34h?Ej1%G&j*Ja-o!A-&MO1w8{Gl#vV-UZ4DjKGaZrN8#>)iOolDQd z(EkzM($LT^$e!{pJ3xHTNC^I~bN)47Ca~(A;Lz#d=LopLRWD zt@=rR{ALtVrW10QtiIsTt>=P6nbX0;EvJJ+C&m_FWCe#tfr9*WaP+sX;76Wj2eP+& zzgBwi-_suASxr2g0dpOEhpEAt{Kug>`&tht4UT{_hxi*Xk~5uX;48nUBrJ8aTk2(D{6mwXzo7Sh7q zTQM_G*&IF<#y_TP$kk^zT574KGn=r{;%)QOmeTm*+6+e2XZ`U9;)zZh(|zaEvROP^{47+XSm z;7(U?=$YVf^PbkBZv{`dzH--NO^CT)!csms)a*g_7+2KnYaRMo>*(vQrV|HOP{cZe zM0q#z?z3mlMuUglD=&d!bo|Mhk>KG&+=AFh<7O{thxS2D>u@I(Q1(go?ay3EbLeT& zsd3P`t+{%0=^ts32p$zF1UA#q`pr3;l!zDOwrL*nv<|n7Q1Ia5uAm`eIy*Sxy?1nj%lp7c#d8NoD8B!A%mu-w!!09p z8+i8yth84X^t7(~MS3~9sbuY`mf+)W%G5d}`XqR`x&B$kUMp&qorIRsJUtl=UM4yx zZH#^8Ex2Ux^#j*6eX!~JrW>w5Kho0;%RX)wVmi<*3;~OWy~BvW&arp38hsym{Z2jz zzHtp7H;;7?7(O;Y-9PlwU_;qj6ss|KKiQIN46esxrW}L!a$PfyLFO2P&tVJ>awAxJ z4sv7%%M>;}378#Uft7FY1b4si(IIZ`OhakOANWw>e1{Pw4vlyZj$%QLUbJX~BN~nA zYGNG{6JU_fM3(W1nz`S`lQy}T>U`);Tb&epL{%6%qvKdQjEZ4aaPWDtbl7KSCvFi0 zzE3u7x0i8MypY2G&~ww=_Yi^*`%VnLn%`LeV+ueMDgaGD)3X|D9d4t^Oj@3xb$oD$ z*YgI|GsHcsCdIVNobSGY0dW}d08{4t_SkFxf%n*G#TnJcEzs}%nvU7D)N0^~v5lbG z9Q+!Vdb05+#+IY(aeDIM;b+pAXZ3S~=d=ve+{*{43P5~}zl!^A)6lb9h9+9qz4TX1 zx##Rtt`TiTGe7c8ni_K6HH9W;-sIeQlpHyHtPxJ-5&M-|5HMQ~0E~U%EL>~Nmciqo zwGMrAGH6%eaYjy`w)d}Jm*Y+>fu>-7TF?XF^8OiymffX zisMg;JNk*SW$-1x1(QF=%V`)78^6S36}r5h>O3p}2HcfoEkfeqVJhHiv%C!zX zMjg^R^bP6(#O-|)x97KtenO)dva)l|i{NL@$l*kz<-9QdBt68#&ey7_S5Azb1^47o z8><647J}J|?-|MWQ_jbqbNPD)>&*gLGn-~TFzbO?56pUC)&sL1nDxM{2WCAm>w#Gh z%z9wf1G66Z|KS1JTQ^gx@vpA+*ZJ4_lV<<=wQCs1>;3EeW;{BOPOe{9Eu?FSuVyV6 z)2a0})vNt$>-@EK{+iYPSbQLp4s~U^L8jMEQ&UZKwbU?`{+D9A%t#_0O|AE@?2Fp> zll`UgJHqiq>i2ljS=kCphgmMna)xA11q0k&!J`b$@@SPm+e{~%!XOW7&P2+P3 z=#3;V6V-J$KI_n4{yj@eiSG`*xj0XM5*7(t9!B{ol>eM$tnwdP<=+MQ82mIkrTlTL z{B_Vb!f(_*OWyS{jTor@BGA7Cf8H`hKE>s!-It?$&MUARezWAaecUzdzS|hgb6u{3 zll1Hc-||D_vHd$HCk2+ibSY zUgr+0&b6w}=fIaiA75y%vy-dgZKFN0DU?(FR-GYHPNdm1>w#Gh%z9wf1G65O^}wtL z{;POE{k}*2o~ObuTCRStgJly>Kb0AU(|U?hkpR{Yj?`}~di|n60;YZkQz7}Wq~%Hd zW<~w(Xo;k&HM9DSiu#=qtuHC5@;BefCJ29&){;nu0Bld5$Vk9W%5wD#R7UGbB=uXG zg#u;v-&hbyiZOq?V31<$lx#n(=PBh2z|?Os#$-i`ep)tIwfmfuSHEFV_4D;BbSXZ3 z2hZCFc22^!Ezw@dfbaNKIk(`n#M81I+qZbR{WmVs)c+jNHocl^*+F9OC)V&1-y`9G zga;(NN5bP0J}TjegfB>VO2X3;;zyM{`6Tp9SS?|rgdGy@k#Kseb5qst)SMjZcWTPN zjgmqAPEGv|%qgdE_4_n6Zk4?HecHRptKX$z4+>98Ui~iZ-Q=%oZeH)dq_Zm%PiOo! zHG#T7b!AP4qcykH)&{EU0=27T0e_GZ6F?15S*rw>1M2J^v_+JELH@$&Nh?ZB<-Z%8 z2Gclm*&l%L=&)Kcm8YB*8$hxhn#*<5p9F>=q*IKm?Mq)0?aLal|Z~VQpsf;VLs32p-!(9 z7-Nl|e!{ExUOJe==KmYvs|&j!B$IE!BgpC3X@xhVLgw}z!}X+2)%u=68-BvG3*RJb zUUC&~1^ssK$AP)#m%bke(-!>-!1$8;<;0r}q`^zvajo z{Z&$OFGnVH+TZ5Bk0WRGwXnf`KS$ow7a;JvKgE$h=u1f3r#bSL{=Z?o`!gK*lYW?x zzvIX`{fDr|eUu}A)Q^$2V;p%${|T`@z>$abPf(?Q&yoM3pCY#79Qn3>5tTg1k?-i= zBSSySk?(rwHst;%?h4v!?CB(h4{@CKv@&-yL?6_Skz4+OyM?*OVfAMAPN9i&` z7wR+ZS9JM3UH*tmk+G15Vv(_k0#A{#m?n#s58H|<3>4F7E2hb<-GIx5#cniLyPNP& z)8(J&@)f#_(B*q{`CoK-l`e14MI+-%=(2<^bT)^!hAx}w(oUBT(j`Wh{kSa4^Uh5{ z(%%UK@)pc7P-TJ3T9aLYB4gP_>Vy0koMi%U1wz~SYhE8Gu2hwIY$6bIcg%3r?dF0PMvi~Cxz zx+<5DT6ZTEA74zx?t_&7Rt4paCx~Bka1qaY3=LxLz2I>#DZ35D#fQ)spHYaI&GMfB z-S8DrU}pLE5i)Py7m)W*E2!TMdJ#I$7zM4Q7+LpmFy$>SPlDjS4yyB(lpjI9_)X9k zYI7o_brtAZVc}z-^x{Hv5T~;_NvJDWD%h73nR6~21*c{jRQV4tAia1sk5Wq7 z(GMY)H-Lt8msOz#*Dc3yFHEOc&%B?Y*mcW++w*RK_};nHWY>|;-^M(hVBSf}?psVM z@Lm}Hr=%VIJ(TIBZ09P!h_P=mjA-*$WO(lH& zacg7VPf^L{lJ{`mjKMc4;PqtG1M;Eb7E+9)9i^ukeJh+%zl`|FsQ*dawF29tw^LaQ zXFGBYGpGvB7r?QK^p(u#Y8d|>fa?olycXaqD7I=Pcl(iRbQ;gA)>2RDM>-1VjEz;z zGz9b`LEjOn;GodM@}7n2%Y34^7ga?eTtQW}%@Zu6l2beSH>#?N<*F)@t_TR}j=YtS zUC!qbn#JbM2fA=TK4$T6KA=iF`lM2`aD`CQ%NG3>dNdy&n<`$NuqzV_n7Eh2U_n)4(AJg&GJ5 zrL6#YD_CLS#zI#KX7@#$;&JH0lV|}aRk)zPqC8>DEDCsANt++q8)EQc+>NWW%Cp z87VI$<#mg27x6G|Rz*1(QS4=f6&|VTW#zmh;m-pb} zT0^-lxVX2|g)g{`Ev!ISp~>adj76RX!~1KaSihvq@T85UOm}P77~YIgrq`4iMRd`w zHC9y{MQ06=2Jf13J3290TO_xt<3PO{I zv{#lI7k(5f0pD}PxbmY$qp12y0w2Xi&%4I(0lERb1h*NBtBmEX#wwDzAZ--KpekUL zZ!^3P8cV3&`BY+TFubV0$kRiXcT~fgjN!`|7u{r(P8hxb8r8rX)n2XNp_LjXzcC6t z+rX7J7Hl&X>a}G?fqq4qanT2id7d8`WgDP2P-c|tYf6oF*UkEB&CpzP2()7cW*T5e){uTCVRyHMO3-yFlT3`!G`aGy9QY#?d!)5G?* zh4*!Y)4kixcu%^QH0M;@T-VaEt^Y>0w=EIP^qHNVty@St@bJD}ng0H8a=+Aam6_gQ z?n}4EaaL9)LI;I)MA+UYn0pmJo@=KW{cs{a*_fPF*3|57x+>JUW804Qt9P`(A7Px| zwLKO$Ma87CiB3NY?~L{IrloUu1JT+jxp1Zi)l`%A=FVNa+uNX=v~f3XH@nkQOyota z@pLlW-gP6+JClW|BhBjw;TW}nWTM-qgBvtO$PuBJI!<~X-1u+ zX>Ut1nMk&osZ_Yf+??s|Hj~`FXl?|(YqCyCR7J*>}|SyL)k%oKPxyrDY$O zaC}-vGlm_@#8a`JxEb|Fdc#SfttpA7G$#^!V@eIx(VU2%}lnpZx(%u zGx2)-{9>u22m4T{_hxD_xkXMD478cLd>uK2yQ5`BJ~8m5=iY_&it~E5rHhH_Iyp>t z^@gK~o39EFu*sQf^KDx!I%TlT)P~yX>E>5UzfB$QCv-Os;S&zDVgXChluh;GkU+n~ zmvE$rCh#k~YbGNmKje??oirK!Evmt6;GG;a!vms<$+YO1Y*3{ZZ9vWs1FA){Zo*f9L!CT*gME@#;L#gvde0) zO_0Bs_gD6(dQ<7Wfay%!Z0IpDCt?wx;bf$@K^(#Auk7(xw%7VAQ|V|!PdvlrE4w>d zoBfsD@kC`;Cf1jZ#RX#SlYN>Jcru-iA(!Znr7OGX{!%%R;9U=5h6kz2G#y1r>^H?y zv94HOEWN*>Hx`YWQe|u)Tp5dWgCWvgi6BVlK_cKcbZ7edKn$n$$0NPTL>%+F(y1~< z_cbMyNTLZ~M02`DL8LF7>Q%+isYV_~DILxjrh3|wI6PBzekziQCYacX$>{b5zEC7Q z5RSlVhT#;8hSNBsG9C?Kmn243nur+_ymC(_yD`??9Wzs*bRv|F^_!^yGosF%k1VY+Or7^?NWHHm% z))7~@+YCixH-$1l=|u&W7yDCKFZ6{{6nRPRrcfMvLs62!AX!}tQg~nh?j~>HZ31o_ z#6v9nq|CT^^Q8Bo8uK-YlSfmTzO;1!sq7^LC8dF%PdXX!Q4^D=ltvI9;+X-(Nb1p_ zVRaa&2x`Qi$fT)pR3*k~6qS)Z7%0+o-e4GFt5u$&8VFQb|5M2boqDW#Fe+DNM{`$$|MAT3aX2PsU6=UpcetMrPXTXXda> z#qjRODYNKQ8PY{S$4wb0wsxliy|xm;I)%iiAHV&BOs%%Wr{LXKAJ|K*UB6YmiGH&x z+-L4hRo$FO?oD9^npHO@aL1^`d9zgmec}B*$wVd|t%@X~CXP)tQ&mEmRb^61@(j7K z>O7a#)Ktadk-kjS6pRu%pRtQpyj<6eELG|K1Ljml%ojwrZnTt_kz>+PjH*g};vAzj ztrZ<6%V|;whG;Ay7-R7sQH};7Gee?hOE)`{b+X`X7@m0D>~8s<)GrzEgg-4meZ!?Q zA3LqSAeQ`oN%xbWEzwgn@vHqGs;Cti4d)mCxkKT)yIS^$>IKQEzN!>;UK zQ^Or;I(vS{dCHhNm)#P9Lo}Q`uJ(y2I-O^&F}w|-*YURG!Ak)@oxm#nsfw0?zZCX6 z-_aIvLJJL)l$9 z_#enY{~YLZ^?BC5A-v1u>4_Zt^v>T=CFlP+=^;;43wgXY=IQku{CV)RZ<>B7lm1t4 zZ%#-(D?wi_^LEe(`gG@g(*_0VN9Pv}w-CR+*gBtA>AF+Wo%>Jjm3o}t4bZoTq~H0* z{4vmdw)4l8{!z}auu1r{9P~3e=y`}oRIl@U9QuNi=+5s)sz4`u#vJ3nj`L62p>jKN z(7UC6=NAp>9Q=3YpwqWC)6JKE1f9l%IyYI_^Upc>pUOdhHV6GzIp}ZYpfAJ#CVx7= zd|Qk0T&iQAjZax9;jTzBT@y%Pr4#Oog=pId2&ptyXMqTtJZL_Y?hi$<%EHYL&makT z{COyfbK_H?aAqIFvH(BfNt@9Co%ucu2c3}~3x(-fK7@Bv$^AH49#0UVC|(88iSjlK zi-cHu3M*e!*jG{3+98z6bcMEr@H41&>*%<7LR%$Dhr(~&+0@n&YT2=c4$lu^O&)3q zN@8%!P8Paqd;8|5?V#Y81-o(k1?Pvaj0PQ$*9*Yq=Y9H^9=x+z8lkcbg zd91!=-L!u?U|M4R^pASeJ=$U9(t9}GQ@Q_&fby~RM*^lkJDlIs zHR7)<$bu=u1Vh^7Pc2iW_}^TpQI9g`{nY`>3-~Jn@(&PLBwM%T{&NTXfs6%G`}^@g z4~Rcvz#-l*h#VHLW^#Z9;&_4&;OQiQ-W|Xb@_rkMKj~D)qLS3KxfQPmdKhQ##j`pK zMEA#0g@AaY!Aqo3Q;-nKl4f6+1Y~XiPlW*r6fA&jpeG?S^f(zv<8?9%P{aXug8MIE z_R1L38-+H7L0Y`p7V$=?RQbt}#Z3(N<7Ik4Xu}9(0eTifECbE#%Je`g9PdGdlxQs8 zolyC3R~O#HDhz)U$5eSZRAq^bVV!h@=)VS~tqutJSnGbtPVVdJhnM z-Dk)`=>n}y1olJK5UMa8O3dJa)T6ZZjJx|!>)$@#ke#I!EQ=T>;;kdtq zYX76Meg)OKAK$w3-IR*J| zbCoclS{5nzkVAez?h98?@hKGwSMX7X`~fMiV54k*F8QxJ`u)kN4Im;p{jKB_B;&o{ zQ}wIgr=0!^@OM~NxB74DbyS1W>qO1{!TCtE2g zxJt?k>g2V!N#r{wNj#_I6uCZ!{Pj})t~rWRK&Skc9P+Qy1{I_Sm16{I*)37FGl%@0 z?SkVS$M|>Fe}j~F`tOdNLVkaNs!2d4uV5^PeD*rQPg|4iNy*uvgN3v2xj{&&```+o rv^BgYXf+JB5uvJ??byf8$oWkvN@;~nt9Qz#t4wU^L?@|r-