From 0bda0adb25565b999194aa3ea5f9f9e553337064 Mon Sep 17 00:00:00 2001 From: anhnh2002 Date: Mon, 12 Jan 2026 14:32:12 +0700 Subject: [PATCH] make max tokens configurable --- README.md | 30 ++++++++ codewiki/cli/adapters/doc_generator.py | 3 + codewiki/cli/commands/config.py | 69 ++++++++++++++++++- codewiki/cli/commands/generate.py | 44 +++++++++++- codewiki/cli/config_manager.py | 14 +++- codewiki/cli/models/config.py | 15 ++++ .../generate_sub_module_documentations.py | 3 +- codewiki/src/be/cluster_modules.py | 4 +- codewiki/src/be/llm_services.py | 6 +- codewiki/src/config.py | 22 +++++- 10 files changed, 196 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ad5e9d1f..94684cdb 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,9 @@ codewiki config set \ --cluster-model \ --fallback-model +# Configure max token settings +codewiki config set --max-tokens 32768 --max-token-per-module 36369 --max-token-per-leaf-module 16000 + # Show current configuration codewiki config show @@ -196,6 +199,33 @@ codewiki config agent --clear | `--doc-type` | Documentation style | Standalone option | `api`, `architecture`, `user-guide`, `developer` | | `--instructions` | Custom agent instructions | Standalone option | Free-form text | +### Token Settings + +CodeWiki allows you to configure maximum token limits for LLM calls. This is useful for: +- Adapting to different model context windows +- Controlling costs by limiting response sizes +- Optimizing for faster response times + +```bash +# Set max tokens for LLM responses (default: 32768) +codewiki config set --max-tokens 16384 + +# Set max tokens for module clustering (default: 36369) +codewiki config set --max-token-per-module 40000 + +# Set max tokens for leaf modules (default: 16000) +codewiki config set --max-token-per-leaf-module 20000 + +# Override at runtime for a single generation +codewiki generate --max-tokens 16384 --max-token-per-module 40000 +``` + +| Option | Description | Default | +|--------|-------------|---------| +| `--max-tokens` | Maximum output tokens for LLM response | 32768 | +| `--max-token-per-module` | Input tokens threshold for module clustering | 36369 | +| `--max-token-per-leaf-module` | Input tokens threshold for leaf modules | 16000 | + ### Configuration Storage - **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service) diff --git a/codewiki/cli/adapters/doc_generator.py b/codewiki/cli/adapters/doc_generator.py index 1e99fe8f..7e065399 100644 --- a/codewiki/cli/adapters/doc_generator.py +++ b/codewiki/cli/adapters/doc_generator.py @@ -137,6 +137,9 @@ def generate(self) -> DocumentationJob: main_model=self.config.get('main_model'), cluster_model=self.config.get('cluster_model'), fallback_model=self.config.get('fallback_model'), + max_tokens=self.config.get('max_tokens', 32768), + max_token_per_module=self.config.get('max_token_per_module', 36369), + max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000), agent_instructions=self.config.get('agent_instructions') ) diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py index b05e32dd..3a3b175d 100644 --- a/codewiki/cli/commands/config.py +++ b/codewiki/cli/commands/config.py @@ -63,12 +63,30 @@ def config_group(): type=str, help="Fallback model for documentation generation" ) +@click.option( + "--max-tokens", + type=int, + help="Maximum tokens for LLM response (default: 32768)" +) +@click.option( + "--max-token-per-module", + type=int, + help="Maximum tokens per module for clustering (default: 36369)" +) +@click.option( + "--max-token-per-leaf-module", + type=int, + help="Maximum tokens per leaf module (default: 16000)" +) def config_set( api_key: Optional[str], base_url: Optional[str], main_model: Optional[str], cluster_model: Optional[str], - fallback_model: Optional[str] + fallback_model: Optional[str], + max_tokens: Optional[int], + max_token_per_module: Optional[int], + max_token_per_leaf_module: Optional[int] ): """ Set configuration values for CodeWiki. @@ -88,10 +106,18 @@ def config_set( \b # Update only API key $ codewiki config set --api-key sk-new-key + + \b + # Set max tokens for LLM response + $ codewiki config set --max-tokens 16384 + + \b + # Set all max token settings + $ codewiki config set --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000 """ try: # Check if at least one option is provided - if not any([api_key, base_url, main_model, cluster_model, fallback_model]): + if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module]): click.echo("No options provided. Use --help for usage information.") sys.exit(EXIT_CONFIG_ERROR) @@ -113,6 +139,21 @@ def config_set( if fallback_model: validated_data['fallback_model'] = validate_model_name(fallback_model) + if max_tokens is not None: + if max_tokens < 1: + raise ConfigurationError("max_tokens must be a positive integer") + validated_data['max_tokens'] = max_tokens + + if max_token_per_module is not None: + if max_token_per_module < 1: + raise ConfigurationError("max_token_per_module must be a positive integer") + validated_data['max_token_per_module'] = max_token_per_module + + if max_token_per_leaf_module is not None: + if max_token_per_leaf_module < 1: + raise ConfigurationError("max_token_per_leaf_module must be a positive integer") + validated_data['max_token_per_leaf_module'] = max_token_per_leaf_module + # Create config manager and save manager = ConfigManager() manager.load() # Load existing config if present @@ -122,7 +163,10 @@ def config_set( base_url=validated_data.get('base_url'), main_model=validated_data.get('main_model'), cluster_model=validated_data.get('cluster_model'), - fallback_model=validated_data.get('fallback_model') + fallback_model=validated_data.get('fallback_model'), + max_tokens=validated_data.get('max_tokens'), + max_token_per_module=validated_data.get('max_token_per_module'), + max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module') ) # Display success messages @@ -159,6 +203,15 @@ def config_set( if fallback_model: click.secho(f"✓ Fallback model: {fallback_model}", fg="green") + if max_tokens: + click.secho(f"✓ Max tokens: {max_tokens}", fg="green") + + if max_token_per_module: + click.secho(f"✓ Max token per module: {max_token_per_module}", fg="green") + + if max_token_per_leaf_module: + click.secho(f"✓ Max token per leaf module: {max_token_per_leaf_module}", fg="green") + click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True)) except ConfigurationError as e: @@ -215,6 +268,9 @@ def config_show(output_json: bool): "cluster_model": config.cluster_model if config else "", "fallback_model": config.fallback_model if config else "glm-4p5", "default_output": config.default_output if config else "docs", + "max_tokens": config.max_tokens if config else 32768, + "max_token_per_module": config.max_token_per_module if config else 36369, + "max_token_per_leaf_module": config.max_token_per_leaf_module if config else 16000, "agent_instructions": config.agent_instructions.to_dict() if config and config.agent_instructions else {}, "config_file": str(manager.config_file_path) } @@ -248,6 +304,13 @@ def config_show(output_json: bool): if config: click.echo(f" Default Output: {config.default_output}") + click.echo() + click.secho("Token Settings", fg="cyan", bold=True) + if config: + click.echo(f" Max Tokens: {config.max_tokens}") + click.echo(f" Max Token/Module: {config.max_token_per_module}") + click.echo(f" Max Token/Leaf Module: {config.max_token_per_leaf_module}") + click.echo() click.secho("Agent Instructions", fg="cyan", bold=True) if config and config.agent_instructions and not config.agent_instructions.is_empty(): diff --git a/codewiki/cli/commands/generate.py b/codewiki/cli/commands/generate.py index f159943f..8e285a48 100644 --- a/codewiki/cli/commands/generate.py +++ b/codewiki/cli/commands/generate.py @@ -102,6 +102,24 @@ def parse_patterns(patterns_str: str) -> List[str]: is_flag=True, help="Show detailed progress and debug information", ) +@click.option( + "--max-tokens", + type=int, + default=None, + help="Maximum tokens for LLM response (overrides config)", +) +@click.option( + "--max-token-per-module", + type=int, + default=None, + help="Maximum tokens per module for clustering (overrides config)", +) +@click.option( + "--max-token-per-leaf-module", + type=int, + default=None, + help="Maximum tokens per leaf module (overrides config)", +) @click.pass_context def generate_command( ctx, @@ -114,7 +132,10 @@ def generate_command( focus: Optional[str], doc_type: Optional[str], instructions: Optional[str], - verbose: bool + verbose: bool, + max_tokens: Optional[int], + max_token_per_module: Optional[int], + max_token_per_leaf_module: Optional[int] ): """ Generate comprehensive documentation for a code repository. @@ -147,6 +168,14 @@ def generate_command( \b # Custom instructions $ codewiki generate --instructions "Focus on public APIs and include usage examples" + + \b + # Override max tokens for this generation + $ codewiki generate --max-tokens 16384 + + \b + # Set all max token limits + $ codewiki generate --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000 """ logger = create_logger(verbose=verbose) start_time = time.time() @@ -276,6 +305,15 @@ def generate_command( if instructions: logger.debug(f"Custom instructions: {instructions}") + # Log max token settings if verbose + if verbose: + effective_max_tokens = max_tokens if max_tokens is not None else config.max_tokens + effective_max_token_per_module = max_token_per_module if max_token_per_module is not None else config.max_token_per_module + effective_max_token_per_leaf = max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module + logger.debug(f"Max tokens: {effective_max_tokens}") + logger.debug(f"Max token/module: {effective_max_token_per_module}") + logger.debug(f"Max token/leaf module: {effective_max_token_per_leaf}") + # Get agent instructions (merge runtime with persistent) agent_instructions_dict = None if runtime_instructions and not runtime_instructions.is_empty(): @@ -302,6 +340,10 @@ def generate_command( 'base_url': config.base_url, 'api_key': api_key, 'agent_instructions': agent_instructions_dict, + # Max token settings (runtime overrides take precedence) + 'max_tokens': max_tokens if max_tokens is not None else config.max_tokens, + 'max_token_per_module': max_token_per_module if max_token_per_module is not None else config.max_token_per_module, + 'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module, }, verbose=verbose, generate_html=github_pages diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py index a121e92c..652d4ea1 100644 --- a/codewiki/cli/config_manager.py +++ b/codewiki/cli/config_manager.py @@ -88,7 +88,10 @@ def save( main_model: Optional[str] = None, cluster_model: Optional[str] = None, fallback_model: Optional[str] = None, - default_output: Optional[str] = None + default_output: Optional[str] = None, + max_tokens: Optional[int] = None, + max_token_per_module: Optional[int] = None, + max_token_per_leaf_module: Optional[int] = None ): """ Save configuration to file and keyring. @@ -100,6 +103,9 @@ def save( cluster_model: Clustering model fallback_model: Fallback model default_output: Default output directory + max_tokens: Maximum tokens for LLM response + max_token_per_module: Maximum tokens per module for clustering + max_token_per_leaf_module: Maximum tokens per leaf module """ # Ensure config directory exists try: @@ -133,6 +139,12 @@ def save( self._config.fallback_model = fallback_model if default_output is not None: self._config.default_output = default_output + if max_tokens is not None: + self._config.max_tokens = max_tokens + if max_token_per_module is not None: + self._config.max_token_per_module = max_token_per_module + if max_token_per_leaf_module is not None: + self._config.max_token_per_leaf_module = max_token_per_leaf_module # Validate configuration (only if base fields are set) if self._config.base_url and self._config.main_model and self._config.cluster_model: diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py index 330ad8c5..f393fed7 100644 --- a/codewiki/cli/models/config.py +++ b/codewiki/cli/models/config.py @@ -113,6 +113,9 @@ class Configuration: cluster_model: Model for module clustering fallback_model: Fallback model for documentation generation default_output: Default output directory + max_tokens: Maximum tokens for LLM response (default: 32768) + max_token_per_module: Maximum tokens per module for clustering (default: 36369) + max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000) agent_instructions: Custom agent instructions for documentation generation """ base_url: str @@ -120,6 +123,9 @@ class Configuration: cluster_model: str fallback_model: str = "glm-4p5" default_output: str = "docs" + max_tokens: int = 32768 + max_token_per_module: int = 36369 + max_token_per_leaf_module: int = 16000 agent_instructions: AgentInstructions = field(default_factory=AgentInstructions) def validate(self): @@ -141,6 +147,9 @@ def to_dict(self) -> dict: 'main_model': self.main_model, 'cluster_model': self.cluster_model, 'default_output': self.default_output, + 'max_tokens': self.max_tokens, + 'max_token_per_module': self.max_token_per_module, + 'max_token_per_leaf_module': self.max_token_per_leaf_module, } if self.agent_instructions and not self.agent_instructions.is_empty(): result['agent_instructions'] = self.agent_instructions.to_dict() @@ -167,6 +176,9 @@ def from_dict(cls, data: dict) -> 'Configuration': cluster_model=data.get('cluster_model', ''), fallback_model=data.get('fallback_model', 'glm-4p5'), default_output=data.get('default_output', 'docs'), + max_tokens=data.get('max_tokens', 32768), + max_token_per_module=data.get('max_token_per_module', 36369), + max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000), agent_instructions=agent_instructions, ) @@ -217,6 +229,9 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti main_model=self.main_model, cluster_model=self.cluster_model, fallback_model=self.fallback_model, + max_tokens=self.max_tokens, + max_token_per_module=self.max_token_per_module, + max_token_per_leaf_module=self.max_token_per_leaf_module, agent_instructions=final_instructions.to_dict() if final_instructions else None ) diff --git a/codewiki/src/be/agent_tools/generate_sub_module_documentations.py b/codewiki/src/be/agent_tools/generate_sub_module_documentations.py index 8637bb6d..a40b3f42 100644 --- a/codewiki/src/be/agent_tools/generate_sub_module_documentations.py +++ b/codewiki/src/be/agent_tools/generate_sub_module_documentations.py @@ -7,7 +7,6 @@ from codewiki.src.be.prompt_template import SYSTEM_PROMPT, LEAF_SYSTEM_PROMPT, format_user_prompt from codewiki.src.be.utils import is_complex_module, count_tokens from codewiki.src.be.cluster_modules import format_potential_core_components -from codewiki.src.config import MAX_TOKEN_PER_LEAF_MODULE import logging logger = logging.getLogger(__name__) @@ -47,7 +46,7 @@ async def generate_sub_module_documentation( num_tokens = count_tokens(format_potential_core_components(core_component_ids, ctx.deps.components)[-1]) - if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= MAX_TOKEN_PER_LEAF_MODULE: + if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= ctx.deps.config.max_token_per_leaf_module: sub_agent = Agent( model=fallback_models, name=sub_module_name, diff --git a/codewiki/src/be/cluster_modules.py b/codewiki/src/be/cluster_modules.py index 23466854..580556c4 100644 --- a/codewiki/src/be/cluster_modules.py +++ b/codewiki/src/be/cluster_modules.py @@ -7,7 +7,7 @@ from codewiki.src.be.dependency_analyzer.models.core import Node from codewiki.src.be.llm_services import call_llm from codewiki.src.be.utils import count_tokens -from codewiki.src.config import MAX_TOKEN_PER_MODULE, Config +from codewiki.src.config import Config from codewiki.src.be.prompt_template import format_cluster_prompt @@ -54,7 +54,7 @@ def cluster_modules( """ potential_core_components, potential_core_components_with_code = format_potential_core_components(leaf_nodes, components) - if count_tokens(potential_core_components_with_code) <= MAX_TOKEN_PER_MODULE: + if count_tokens(potential_core_components_with_code) <= config.max_token_per_module: logger.debug(f"Skipping clustering for {current_module_name} because the potential core components are too few: {count_tokens(potential_core_components_with_code)} tokens") return {} diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py index 4ff99dbb..0de98438 100644 --- a/codewiki/src/be/llm_services.py +++ b/codewiki/src/be/llm_services.py @@ -20,7 +20,7 @@ def create_main_model(config: Config) -> OpenAIModel: ), settings=OpenAIModelSettings( temperature=0.0, - max_tokens=32768 + max_tokens=config.max_tokens ) ) @@ -35,7 +35,7 @@ def create_fallback_model(config: Config) -> OpenAIModel: ), settings=OpenAIModelSettings( temperature=0.0, - max_tokens=32768 + max_tokens=config.max_tokens ) ) @@ -81,6 +81,6 @@ def call_llm( model=model, messages=[{"role": "user", "content": prompt}], temperature=temperature, - max_tokens=32768 + max_tokens=config.max_tokens ) return response.choices[0].message.content \ No newline at end of file diff --git a/codewiki/src/config.py b/codewiki/src/config.py index f37f7507..3a692faa 100644 --- a/codewiki/src/config.py +++ b/codewiki/src/config.py @@ -14,8 +14,13 @@ MODULE_TREE_FILENAME = 'module_tree.json' OVERVIEW_FILENAME = 'overview.md' MAX_DEPTH = 2 -MAX_TOKEN_PER_MODULE = 36_369 -MAX_TOKEN_PER_LEAF_MODULE = 16_000 +# Default max token settings +DEFAULT_MAX_TOKENS = 32_768 +DEFAULT_MAX_TOKEN_PER_MODULE = 36_369 +DEFAULT_MAX_TOKEN_PER_LEAF_MODULE = 16_000 +# Legacy constants (for backward compatibility) +MAX_TOKEN_PER_MODULE = DEFAULT_MAX_TOKEN_PER_MODULE +MAX_TOKEN_PER_LEAF_MODULE = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE # CLI context detection _CLI_CONTEXT = False @@ -52,6 +57,10 @@ class Config: main_model: str cluster_model: str fallback_model: str = FALLBACK_MODEL_1 + # Max token settings + max_tokens: int = DEFAULT_MAX_TOKENS + max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE + max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE # Agent instructions for customization agent_instructions: Optional[Dict[str, Any]] = None @@ -146,6 +155,9 @@ def from_cli( main_model: str, cluster_model: str, fallback_model: str = FALLBACK_MODEL_1, + max_tokens: int = DEFAULT_MAX_TOKENS, + max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE, + max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE, agent_instructions: Optional[Dict[str, Any]] = None ) -> 'Config': """ @@ -159,6 +171,9 @@ def from_cli( main_model: Primary model cluster_model: Clustering model fallback_model: Fallback model + max_tokens: Maximum tokens for LLM response + max_token_per_module: Maximum tokens per module for clustering + max_token_per_leaf_module: Maximum tokens per leaf module agent_instructions: Custom agent instructions dict Returns: @@ -178,5 +193,8 @@ def from_cli( main_model=main_model, cluster_model=cluster_model, fallback_model=fallback_model, + max_tokens=max_tokens, + max_token_per_module=max_token_per_module, + max_token_per_leaf_module=max_token_per_leaf_module, agent_instructions=agent_instructions ) \ No newline at end of file