Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions agent_core/core/credentials/oauth_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"""

import asyncio
import html
import ipaddress
import logging
import os
Expand Down Expand Up @@ -120,10 +121,30 @@ class _OAuthCallbackHandler(BaseHTTPRequestHandler):
def do_GET(self):
"""Handle GET request from OAuth callback."""
params = parse_qs(urlparse(self.path).query)
result_holder["code"] = params.get("code", [None])[0]
result_holder["state"] = params.get("state", [None])[0]
returned_state = params.get("state", [None])[0]
result_holder["error"] = params.get("error", [None])[0]

# Validate OAuth state parameter to prevent CSRF
expected_state = result_holder.get("expected_state")
import hmac
if not expected_state:
# No state in auth URL -- log warning but allow (some providers don't support state)
logger.warning("[OAUTH] No state parameter in auth URL — CSRF protection disabled for this flow")
result_holder["code"] = params.get("code", [None])[0]
elif not returned_state:
# State was expected but callback didn't include it -- reject
result_holder["error"] = "OAuth callback missing state parameter — possible CSRF attack"
result_holder["code"] = None
logger.warning("[OAUTH] Expected state but callback had none")
elif not hmac.compare_digest(str(returned_state), str(expected_state)):
result_holder["error"] = "OAuth state mismatch — possible CSRF attack"
result_holder["code"] = None
logger.warning("[OAUTH] State mismatch: expected %s, got %s", expected_state, returned_state)
else:
result_holder["code"] = params.get("code", [None])[0]

result_holder["state"] = returned_state

self.send_response(200)
self.send_header("Content-Type", "text/html")
self.end_headers()
Expand All @@ -132,8 +153,9 @@ def do_GET(self):
b"<h2>Authorization successful!</h2><p>You can close this tab.</p>"
)
else:
safe_error = html.escape(str(result_holder.get('error') or 'Unknown error'))
self.wfile.write(
f"<h2>Failed</h2><p>{result_holder['error']}</p>".encode()
f"<h2>Failed</h2><p>{safe_error}</p>".encode()
)

def log_message(self, format, *args):
Expand Down Expand Up @@ -203,8 +225,12 @@ def run_oauth_flow(
if cancel_event and cancel_event.is_set():
return None, "OAuth cancelled"

# Extract the state parameter from the auth URL for CSRF validation
auth_params = parse_qs(urlparse(auth_url).query)
expected_state = auth_params.get("state", [None])[0]

# Use instance-level result holder instead of class-level state
result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None}
result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None, "expected_state": expected_state}
handler_class = _make_callback_handler(result_holder)

try:
Expand Down
43 changes: 42 additions & 1 deletion app/data/action/http_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,20 +169,61 @@ def send_http_requests(input_data: dict) -> dict:
return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Unsupported method.'}
if not url or not (url.startswith('http://') or url.startswith('https://')):
return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Invalid or missing URL.'}

# SSRF protection: block requests to private/internal networks and cloud metadata
from urllib.parse import urlparse as _urlparse
import ipaddress as _ipaddress
import socket as _socket

def _is_url_ssrf_safe(check_url: str) -> str | None:
"""Return an error message if the URL targets a blocked host, or None if safe."""
_parsed = _urlparse(check_url)
_hostname = _parsed.hostname or ''
_BLOCKED_HOSTS = {'169.254.169.254', 'metadata.google.internal', 'metadata.internal'}
if _hostname in _BLOCKED_HOSTS:
return 'Blocked: requests to cloud metadata endpoints are not allowed.'
try:
_resolved = _socket.getaddrinfo(_hostname, None)
for _family, _type, _proto, _canonname, _sockaddr in _resolved:
_ip = _ipaddress.ip_address(_sockaddr[0])
if _ip.is_private or _ip.is_loopback or _ip.is_link_local:
return f'Blocked: requests to private/internal addresses ({_hostname}) are not allowed.'
except (_socket.gaierror, ValueError):
return f'Blocked: could not resolve hostname ({_hostname}).'
return None

ssrf_error = _is_url_ssrf_safe(url)
if ssrf_error:
return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message': ssrf_error}
if json_body is not None and data_body is not None:
return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Provide either json or data, not both.'}
if not isinstance(headers, dict) or not isinstance(params, dict):
return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'headers and params must be objects.'}
headers = {str(k): str(v) for k, v in headers.items()}
params = {str(k): str(v) for k, v in params.items()}
kwargs = {'headers': headers, 'params': params, 'timeout': timeout, 'allow_redirects': allow_redirects, 'verify': verify_tls}
kwargs = {'headers': headers, 'params': params, 'timeout': timeout, 'allow_redirects': False, 'verify': verify_tls}
if json_body is not None:
kwargs['json'] = json_body
elif data_body is not None:
kwargs['data'] = data_body
try:
t0 = time.time()
resp = requests.request(method, url, **kwargs)
# Manually follow redirects with SSRF validation on each hop
_max_redirects = 10
while allow_redirects and resp.is_redirect and _max_redirects > 0:
_max_redirects -= 1
redirect_url = resp.headers.get('Location', '')
if not redirect_url:
break
# Resolve relative redirects
if not redirect_url.startswith(('http://', 'https://')):
from urllib.parse import urljoin
redirect_url = urljoin(resp.url, redirect_url)
redirect_error = _is_url_ssrf_safe(redirect_url)
if redirect_error:
return {'status':'error','status_code':resp.status_code,'response_headers':{},'body':'','final_url':resp.url,'elapsed_ms':int((time.time()-t0)*1000),'message':f'Redirect blocked: {redirect_error}'}
resp = requests.request('GET', redirect_url, **{**kwargs, 'allow_redirects': False})
elapsed_ms = int((time.time() - t0) * 1000)
resp_headers = {k: v for k, v in resp.headers.items()}
parsed_json = None
Expand Down
23 changes: 21 additions & 2 deletions app/data/action/read_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,26 @@ def read_file(input_data: dict) -> dict:
'message': 'file_path is required.'
}

if not os.path.isfile(file_path):
# Resolve symlinks and normalize path to prevent traversal attacks
resolved_path = os.path.realpath(file_path)

# Block access to sensitive system files and directories
_BLOCKED_PREFIXES = ('/etc/shadow', '/etc/gshadow')
_BLOCKED_DIRS = ('.credentials', '.ssh', '.gnupg', '.aws')
path_parts = resolved_path.replace('\\', '/').split('/')
if any(resolved_path.startswith(p) for p in _BLOCKED_PREFIXES) or \
any(part in _BLOCKED_DIRS for part in path_parts):
return {
'status': 'error',
'content': '',
'total_lines': 0,
'lines_returned': 0,
'offset': 0,
'has_more': False,
'message': f'Access denied: {file_path} is in a restricted location.'
}

if not os.path.isfile(resolved_path):
return {
'status': 'error',
'content': '',
Expand All @@ -142,7 +161,7 @@ def read_file(input_data: dict) -> dict:
}

try:
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
with open(resolved_path, 'r', encoding=encoding, errors='replace') as f:
all_lines = f.readlines()

total_lines = len(all_lines)
Expand Down
18 changes: 14 additions & 4 deletions app/data/action/write_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,24 @@ def write_file(input_data: dict) -> dict:
if write_mode not in ('overwrite', 'append'):
return {'status': 'error', 'file_path': '', 'bytes_written': 0, 'message': "mode must be 'overwrite' or 'append'."}

# Resolve path to prevent traversal attacks (resolve parent since file may not exist yet)
parent_dir = os.path.dirname(os.path.abspath(file_path))
resolved_parent = os.path.realpath(parent_dir) if os.path.exists(parent_dir) else os.path.abspath(parent_dir)
resolved_path = os.path.join(resolved_parent, os.path.basename(file_path))

# Block writes to sensitive directories
_BLOCKED_DIRS = ('.credentials', '.ssh', '.gnupg', '.aws', '.env')
path_parts = resolved_path.replace('\\', '/').split('/')
if any(part in _BLOCKED_DIRS for part in path_parts):
return {'status': 'error', 'file_path': '', 'bytes_written': 0, 'message': f'Access denied: cannot write to restricted location.'}

try:
# Create parent directories if needed
parent_dir = os.path.dirname(file_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
if resolved_parent:
os.makedirs(resolved_parent, exist_ok=True)

file_mode = 'w' if write_mode == 'overwrite' else 'a'
with open(file_path, file_mode, encoding=encoding) as f:
with open(resolved_path, file_mode, encoding=encoding) as f:
bytes_written = f.write(content)

return {
Expand Down
11 changes: 10 additions & 1 deletion app/external_comms/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import json
import logging
import os
import stat
from dataclasses import asdict, fields
from pathlib import Path
from typing import Optional, Type, TypeVar
Expand All @@ -32,6 +34,11 @@ def _get_credentials_dir() -> Path:
from app.config import PROJECT_ROOT
_credentials_dir = PROJECT_ROOT / ".credentials"
_credentials_dir.mkdir(parents=True, exist_ok=True)
# Restrict directory permissions to owner only (rwx------)
try:
os.chmod(_credentials_dir, stat.S_IRWXU)
except OSError:
pass # Best-effort on platforms that don't support chmod (e.g. Windows)
return _credentials_dir


Expand Down Expand Up @@ -76,7 +83,9 @@ def save_credential(filename: str, credential) -> None:
"""
path = _get_credentials_dir() / filename
try:
with open(path, "w", encoding="utf-8") as f:
# Create file with restricted permissions atomically (rw-------)
fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(asdict(credential), f, indent=2, default=str)
logger.info(f"Saved credential: {filename}")
except Exception as e:
Expand Down
9 changes: 6 additions & 3 deletions app/security/prompt_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,17 @@ def sanitize_user_message(text: str, max_length: int = 5000) -> str:
suspicious_patterns.append(pattern)

if suspicious_patterns:
# Log these for monitoring (optional)
# Log and strip detected injection patterns
import logging
logger = logging.getLogger(__name__)
logger.warning(
f"[SECURITY] Potential prompt injection detected. "
f"[SECURITY] Potential prompt injection detected and stripped. "
f"Text: {text[:100]}... Patterns: {suspicious_patterns[:2]}"
)

# Strip matched injection patterns from the text
for pattern in suspicious_patterns:
text = re.sub(pattern, '[FILTERED]', text)

return text

@staticmethod
Expand Down
124 changes: 124 additions & 0 deletions context/claude.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# CraftBot Security Review - Session Context

## Current State

- **Repo:** CraftOS-dev/CraftBot (cloned to ~/Claude/projects/CraftBot)
- **Fork:** eesb99/CraftBot
- **Branch:** security/fix-critical-vulnerabilities
- **PR:** CraftOS-dev/CraftBot#198 (open -- follow-up to merged #195)
- **Date:** 2026-04-16 (Session 2)

## Session 1 Summary (2026-04-15)

### Goals
- Clone CraftBot repo and perform a full security review
- Fix identified vulnerabilities and submit PR to upstream

### Security Review Findings (12 total)

| # | Severity | Finding | CWE | Fixed? |
|---|----------|---------|-----|--------|
| 1 | CRITICAL | Hardcoded OAuth credentials (base64-encoded in source) | CWE-798 | No (arch) |
| 2 | CRITICAL | Unsandboxed `exec()` of LLM-generated Python | CWE-94 | No (arch) |
| 3 | CRITICAL | Unrestricted `shell=True` command execution | CWE-78 | No (arch) |
| 4 | HIGH | Plaintext credential storage | CWE-312 | Yes (permissions) |
| 5 | HIGH | No path traversal protection on file ops | CWE-22 | Yes |
| 6 | HIGH | XSS in OAuth callback | CWE-79 | Yes |
| 7 | HIGH | Prompt sanitizer detects but doesn't block | CWE-20 | Yes |
| 8 | MEDIUM | Auto-install arbitrary pip packages | CWE-829 | No (design) |
| 9 | MEDIUM | Docker socket exposure | CWE-250 | No (deploy) |
| 10 | MEDIUM | No SSRF protection on HTTP requests | CWE-918 | Yes |
| 11 | MEDIUM | OAuth state not validated (CSRF) | CWE-352 | Yes |
| 12 | LOW | Broad `COPY .` in Dockerfile | - | No (low risk) |

### Files Modified (6)
1. `agent_core/core/credentials/oauth_server.py` - XSS fix + CSRF state validation
2. `app/data/action/http_request.py` - SSRF protection (private IPs, cloud metadata)
3. `app/data/action/read_file.py` - Path traversal protection
4. `app/data/action/write_file.py` - Path traversal protection
5. `app/external_comms/credentials.py` - File permission hardening (0600)
6. `app/security/prompt_sanitizer.py` - Enforce pattern stripping

### Commits
- `ae5025e` - security: fix 6 vulnerabilities (XSS, CSRF, SSRF, path traversal, credential storage, prompt injection)

### Key Architecture Notes
- Python AI agent with LLM integrations (OpenAI, Gemini, Anthropic)
- OAuth for 6+ platforms (Google, Slack, Notion, LinkedIn, Discord, Telegram)
- Actions system in `app/data/action/` -- each action is a decorated function
- Credentials stored as JSON in `.credentials/` directory
- GUI mode uses Docker containers with desktop environments
- 150+ skills in `skills/` directory
- TUI built with Textual, browser UI with separate frontend

### Next Steps
- [x] Monitor PR CraftOS-dev/CraftBot#195 for maintainer feedback -- MERGED
- [ ] Consider filing separate issues for the 3 CRITICAL unfixed items
- [ ] If PR is merged, the embedded credentials issue should be escalated as a security advisory

## Session 2 Summary (2026-04-16)

### Goals
- Rebase security branch onto latest origin/main
- Code review (unified-review --fix) of session 1 security fixes
- Fix bugs and architectural bypass vectors found during review
- Open follow-up PR (#198) to upstream

### Unified Review Findings (10 total, >= 80 confidence)

Dispatched parallel reviewers: python-code-reviewer + security-focused code-reviewer.

| Confidence | Finding | Status |
|-----------|---------|--------|
| 95 | `socket.gaierror` vs `_socket.gaierror` NameError silently disabled SSRF | Fixed (auto-fix) |
| 93 | HTTP redirect bypass defeats SSRF check entirely | Fixed (PR A) |
| 92 | CSRF state validation silently skipped when state absent | Fixed (PR A) |
| 90 | DNS rebinding TOCTOU in resolve-then-request pattern | Deferred |
| 88 | Non-timing-safe OAuth state comparison | Fixed (auto-fix) |
| 85 | Path blocklist too narrow for read_file/write_file | Deferred |
| 85 | Credential file written world-readable before chmod | Fixed (auto-fix) |
| 82 | Outer `except Exception: pass` makes SSRF fail-open | Fixed (PR A) |
| 82 | Sequential pattern stripping creates new injection vectors | Deferred |
| 80 | Overly broad regex causes false positives on "run", "import" | Deferred |

### Implementation Details

**Commit 1 (c8389b2) -- Bug fixes (auto-fix phase):**
- `_socket.gaierror` NameError fix
- `hmac.compare_digest()` for timing-safe OAuth state comparison
- `os.open()` with 0o600 for atomic credential file permissions

**Commit 2 (91ac962) -- Architectural fixes (PR A):**
- Extracted `_is_url_ssrf_safe()` helper for reuse across redirect hops
- Disabled `allow_redirects`, manually follow redirects with SSRF validation per hop
- Removed fail-open `except Exception: pass` -- DNS failures now block requests
- Explicit 3-way CSRF state handling (no state/missing from callback/mismatch)

### Files Modified (3)
1. `app/data/action/http_request.py` - SSRF redirect validation, fail-closed, helper extraction
2. `agent_core/core/credentials/oauth_server.py` - CSRF enforcement, timing-safe comparison
3. `app/external_comms/credentials.py` - Atomic file permissions

### Commits
- `c8389b2` - security: fix SSRF NameError, timing-safe OAuth state, atomic credential perms
- `91ac962` - security: fix SSRF redirect bypass, fail-closed validation, CSRF enforcement

### Code Design Assessment
- Architecture is solid (protocol-driven, registry DI, plugin-based)
- Security was bolted on after the fact, not designed in
- Zero test infrastructure -- biggest gap for security confidence
- Prompt sanitizer is fundamentally unsound (regex vs semantic attacks)
- Blocklist approach is inherently weaker than allowlist for file/URL validation

### Advisor Recommendations (prioritized)
1. **PR A** (done): SSRF redirect bypass + fail-closed + CSRF enforcement
2. **PR B** (next): pytest infrastructure + action file unit tests
3. **PR C** (future): Extract shared validation utilities (`validate_url()`, `validate_file_path()`)
4. **Skip**: Prompt sanitizer iteration (flawed approach, suggest architecture change instead)

### Next Steps
- [ ] Monitor PR CraftOS-dev/CraftBot#198 for maintainer feedback
- [ ] PR B: Add pytest framework with tests for http_request, read_file, write_file
- [ ] PR C: Extract centralized validation utilities
- [ ] File GitHub issue recommending prompt injection architecture change
- [ ] Consider filing security advisory for embedded OAuth credentials (CWE-798)