Jakedismo · Jakedismo · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/.codegraph.toml.example b/.codegraph.toml.example
@@ -42,29 +42,66 @@ batch_size = 64
 # Set to false for maximum speed if using an external agent
 enabled = false
 
-# LLM provider: "ollama" or "lmstudio"
-# "lmstudio" recommended for MLX + Flash Attention 2 (macOS)
+# LLM provider: "ollama", "lmstudio", "anthropic", "openai", or "openai-compatible"
+# - "lmstudio": Local LLMs via LM Studio (recommended for MLX + Flash Attention 2 on macOS)
+# - "ollama": Local LLMs via Ollama
+# - "anthropic": Anthropic Claude API (requires API key)
+# - "openai": OpenAI GPT API (requires API key)
+# - "openai-compatible": Any OpenAI-compatible API endpoint
 provider = "lmstudio"
 
 # LLM model identifier
-# For LM Studio: lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+# For LM Studio: lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF
 # For Ollama: Model name (e.g., "qwen2.5-coder:14b", "codellama:13b")
-# Recommended: DeepSeek Coder v2 Lite Instruct Q4_K_M (superior performance)
+# For Anthropic: Model name (e.g., "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022")
+# For OpenAI: Model name (e.g., "gpt-4o", "gpt-4o-mini", "gpt-4-turbo")
+# For OpenAI-compatible: Custom model name
+# Recommended: DeepSeek Coder v2 Lite Instruct Q4_K_M (local), or Claude 3.5 Sonnet (cloud)
 model = "lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF"
 
-# LM Studio URL (default port 1234)
+# LM Studio URL (only used if provider is "lmstudio")
 lmstudio_url = "http://localhost:1234"
 
-# Ollama URL
+# Ollama URL (only used if provider is "ollama")
 ollama_url = "http://localhost:11434"
 
+# OpenAI-compatible base URL (only used if provider is "openai-compatible")
+# Example: "http://localhost:1234/v1" for LM Studio OpenAI endpoint
+# openai_compatible_url = "http://localhost:1234/v1"
+
+# Anthropic API key (only used if provider is "anthropic")
+# Can also be set via ANTHROPIC_API_KEY environment variable
+# anthropic_api_key = "sk-ant-..."
+
+# OpenAI API key (only used if provider is "openai" or some "openai-compatible" endpoints)
+# Can also be set via OPENAI_API_KEY environment variable
+# openai_api_key = "sk-..."
+
 # Context window size (tokens)
 # DeepSeek Coder v2 Lite: 32768 tokens
+# Claude 3.5 Sonnet: 200000 tokens
+# GPT-4o: 128000 tokens
 context_window = 32000
 
-# Temperature for generation (0.0 = deterministic, 1.0 = creative)
+# Temperature for generation (0.0 = deterministic, 2.0 = very creative)
 temperature = 0.1
 
+# Maximum tokens to generate in responses (legacy parameter, use max_output_tokens for Responses API)
+max_tokens = 4096
+
+# Maximum output tokens for Responses API and reasoning models
+# If not set, falls back to max_tokens
+# max_output_tokens = 4096
+
+# Reasoning effort for reasoning models (o1, o3, o4-mini, GPT-5)
+# Options: "minimal", "low", "medium", "high"
+# Higher effort = more reasoning tokens = better quality but slower and more expensive
+# Only applies to reasoning models, ignored by standard models
+# reasoning_effort = "medium"
+
+# Request timeout in seconds
+timeout_secs = 120
+
 # Insights mode: "context-only", "balanced", or "deep"
 # - context-only: Return context only (fastest, for agents)
 # - balanced: Process top 10 files with LLM (good speed/quality)

diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,7 @@ members = [
     "crates/codegraph-lb",
     "crates/codegraph-ai",
     "crates/codegraph-api",
+    "crates/codegraph-cli",
     "crates/core-rag-mcp-server",
     "scripts",
     "tests/integration"