From 0abef8ade222383c4c72e33af0c25b3890d25790 Mon Sep 17 00:00:00 2001 From: Joe S Date: Mon, 20 Oct 2025 11:47:26 -0700 Subject: [PATCH 1/5] implement list_tables pagination --- mcp_clickhouse/__init__.py | 8 ++ mcp_clickhouse/mcp_server.py | 265 +++++++++++++++++++++++++++++++---- pyproject.toml | 1 + tests/test_mcp_server.py | 50 ++++--- tests/test_pagination.py | 258 ++++++++++++++++++++++++++++++++++ tests/test_tool.py | 24 ++-- uv.lock | 11 ++ 7 files changed, 564 insertions(+), 53 deletions(-) create mode 100644 tests/test_pagination.py diff --git a/mcp_clickhouse/__init__.py b/mcp_clickhouse/__init__.py index c442d9c..b30cb97 100644 --- a/mcp_clickhouse/__init__.py +++ b/mcp_clickhouse/__init__.py @@ -8,6 +8,10 @@ create_chdb_client, run_chdb_select_query, chdb_initial_prompt, + table_pagination_cache, + fetch_table_names_from_system, + get_paginated_table_data, + create_page_token, ) @@ -26,4 +30,8 @@ "create_chdb_client", "run_chdb_select_query", "chdb_initial_prompt", + "table_pagination_cache", + "fetch_table_names_from_system", + "get_paginated_table_data", + "create_page_token", ] diff --git a/mcp_clickhouse/mcp_server.py b/mcp_clickhouse/mcp_server.py index 01639b2..452d1f9 100644 --- a/mcp_clickhouse/mcp_server.py +++ b/mcp_clickhouse/mcp_server.py @@ -1,24 +1,26 @@ -import logging -import json -from typing import Optional, List, Any -import concurrent.futures import atexit +import concurrent.futures +import json +import logging import os +import uuid +from dataclasses import asdict, dataclass, field, is_dataclass +from typing import Any, Dict, List, Optional -import clickhouse_connect import chdb.session as chs +import clickhouse_connect +from cachetools import TTLCache from clickhouse_connect.driver.binding import format_query_value from dotenv import load_dotenv from fastmcp import FastMCP -from fastmcp.tools import Tool -from fastmcp.prompts import Prompt from fastmcp.exceptions import ToolError -from dataclasses import dataclass, field, asdict, is_dataclass +from fastmcp.prompts import Prompt +from fastmcp.tools import Tool from starlette.requests import Request from starlette.responses import PlainTextResponse -from mcp_clickhouse.mcp_env import get_config, get_chdb_config, get_mcp_config from mcp_clickhouse.chdb_prompt import CHDB_PROMPT +from mcp_clickhouse.mcp_env import get_chdb_config, get_config, get_mcp_config @dataclass @@ -135,12 +137,29 @@ def list_databases(): return json.dumps(databases) -def list_tables(database: str, like: Optional[str] = None, not_like: Optional[str] = None): - """List available ClickHouse tables in a database, including schema, comment, - row count, and column count.""" - logger.info(f"Listing tables in database '{database}'") - client = create_clickhouse_client() - query = f"SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment FROM system.tables WHERE database = {format_query_value(database)}" +# Store pagination state for list_tables with 1-hour expiry +# Using TTLCache from cachetools to automatically expire entries after 1 hour +table_pagination_cache: TTLCache = TTLCache(maxsize=100, ttl=3600) # 3600 seconds = 1 hour + + +def fetch_table_names_from_system( + client, + database: str, + like: Optional[str] = None, + not_like: Optional[str] = None, +) -> List[str]: + """Get list of table names from system.tables. + + Args: + client: ClickHouse client + database: Database name + like: Optional pattern to filter table names (LIKE) + not_like: Optional pattern to filter out table names (NOT LIKE) + + Returns: + List of table names + """ + query = f"SELECT name FROM system.tables WHERE database = {format_query_value(database)}" if like: query += f" AND name LIKE {format_query_value(like)}" @@ -148,23 +167,219 @@ def list_tables(database: str, like: Optional[str] = None, not_like: Optional[st query += f" AND name NOT LIKE {format_query_value(not_like)}" result = client.query(query) + table_names = [row[0] for row in result.result_rows] + return table_names + - # Deserialize result as Table dataclass instances +def get_paginated_table_data( + client, + database: str, + table_names: List[str], + start_idx: int, + page_size: int, + include_detailed_columns: bool = True, +) -> tuple[List[Table], int, bool]: + """Get detailed information for a page of tables. + + Args: + client: ClickHouse client + database: Database name + table_names: List of all table names to paginate + start_idx: Starting index for pagination + page_size: Number of tables per page + like: Optional LIKE filter + not_like: Optional NOT LIKE filter + include_detailed_columns: Whether to include detailed column metadata (default: True) + + Returns: + Tuple of (list of Table objects, end index, has more pages) + """ + end_idx = min(start_idx + page_size, len(table_names)) + current_page_table_names = table_names[start_idx:end_idx] + + query = f""" + SELECT database, name, engine, create_table_query, dependencies_database, + dependencies_table, engine_full, sorting_key, primary_key, total_rows, + total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment + FROM system.tables + WHERE database = {format_query_value(database)} + AND name IN ({", ".join(format_query_value(name) for name in current_page_table_names)}) + """ + + result = client.query(query) tables = result_to_table(result.column_names, result.result_rows) - for table in tables: - column_data_query = f"SELECT database, table, name, type AS column_type, default_kind, default_expression, comment FROM system.columns WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}" - column_data_query_result = client.query(column_data_query) - table.columns = [ - c - for c in result_to_column( + if include_detailed_columns: + for table in tables: + column_data_query = f""" + SELECT database, table, name, type AS column_type, default_kind, default_expression, comment + FROM system.columns + WHERE database = {format_query_value(database)} + AND table = {format_query_value(table.name)} + """ + column_data_query_result = client.query(column_data_query) + table.columns = result_to_column( column_data_query_result.column_names, column_data_query_result.result_rows, ) - ] + else: + for table in tables: + table.columns = [] + + return tables, end_idx, end_idx < len(table_names) + + +def create_page_token( + database: str, + like: Optional[str], + not_like: Optional[str], + table_names: List[str], + end_idx: int, + include_detailed_columns: bool, +) -> str: + """Create a new page token and store it in the cache. + + Args: + database: Database name + like: LIKE pattern used to filter tables + not_like: NOT LIKE pattern used to filter tables + table_names: List of all table names + end_idx: Index to start from for the next page + include_detailed_columns: Whether to include detailed column metadata + + Returns: + New page token + """ + token = str(uuid.uuid4()) + table_pagination_cache[token] = { + "database": database, + "like": like, + "not_like": not_like, + "table_names": table_names, + "start_idx": end_idx, + "include_detailed_columns": include_detailed_columns, + } + return token + + +def list_tables( + database: str, + like: Optional[str] = None, + not_like: Optional[str] = None, + page_token: Optional[str] = None, + page_size: int = 50, + include_detailed_columns: bool = True, +) -> Dict[str, Any]: + """List available ClickHouse tables in a database, including schema, comment, + row count, and column count. + + Args: + database: The database to list tables from + like: Optional LIKE pattern to filter table names + not_like: Optional NOT LIKE pattern to exclude table names + page_token: Token for pagination, obtained from a previous call + page_size: Number of tables to return per page (default: 50) + include_detailed_columns: Whether to include detailed column metadata (default: True). + When False, the columns array will be empty but create_table_query still contains + all column information. This reduces payload size for large schemas. + + Returns: + A dictionary containing: + - tables: List of table information (as dictionaries) + - next_page_token: Token for the next page, or None if no more pages + - total_tables: Total number of tables matching the filters + """ + logger.info( + "Listing tables in database '%s' with like=%s, not_like=%s, " + "page_token=%s, page_size=%s, include_detailed_columns=%s", + database, + like, + not_like, + page_token, + page_size, + include_detailed_columns, + ) + client = create_clickhouse_client() + + if page_token and page_token in table_pagination_cache: + cached_state = table_pagination_cache[page_token] + cached_include_detailed = cached_state.get("include_detailed_columns", True) + + if ( + cached_state["database"] != database + or cached_state["like"] != like + or cached_state["not_like"] != not_like + or cached_include_detailed != include_detailed_columns + ): + logger.warning( + "Page token %s is for a different database, filter, or metadata setting. " + "Ignoring token and starting from beginning.", + page_token, + ) + page_token = None + else: + table_names = cached_state["table_names"] + start_idx = cached_state["start_idx"] + + tables, end_idx, has_more = get_paginated_table_data( + client, + database, + table_names, + start_idx, + page_size, + include_detailed_columns, + ) + + next_page_token = None + if has_more: + next_page_token = create_page_token( + database, like, not_like, table_names, end_idx, include_detailed_columns + ) + + del table_pagination_cache[page_token] + + logger.info( + "Returned page with %s tables (total: %s), next_page_token=%s", + len(tables), + len(table_names), + next_page_token, + ) + return { + "tables": [asdict(table) for table in tables], + "next_page_token": next_page_token, + "total_tables": len(table_names), + } + + table_names = fetch_table_names_from_system(client, database, like, not_like) + + start_idx = 0 + tables, end_idx, has_more = get_paginated_table_data( + client, + database, + table_names, + start_idx, + page_size, + include_detailed_columns, + ) + + next_page_token = None + if has_more: + next_page_token = create_page_token( + database, like, not_like, table_names, end_idx, include_detailed_columns + ) + + logger.info( + "Found %s tables, returning %s with next_page_token=%s", + len(table_names), + len(tables), + next_page_token, + ) - logger.info(f"Found {len(tables)} tables") - return [asdict(table) for table in tables] + return { + "tables": [asdict(table) for table in tables], + "next_page_token": next_page_token, + "total_tables": len(table_names), + } def execute_query(query: str): diff --git a/pyproject.toml b/pyproject.toml index ca7a038..9c0fe21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "clickhouse-connect>=0.8.16", "truststore>=0.10", "chdb>=3.3.0", + "cachetools>=5.5.0", ] [project.scripts] diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 28c4420..f811ecd 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -114,10 +114,18 @@ async def test_list_tables_basic(mcp_server, setup_test_database): result = await client.call_tool("list_tables", {"database": test_db}) assert len(result.content) >= 1 - tables = json.loads(result.content[0].text) + response = json.loads(result.content[0].text) + + assert isinstance(response, dict) + assert "tables" in response + assert "next_page_token" in response + assert "total_tables" in response + + tables = response["tables"] # Should have exactly 2 tables assert len(tables) == 2 + assert response["total_tables"] == 2 # Get table names table_names = [table["name"] for table in tables] @@ -149,14 +157,12 @@ async def test_list_tables_with_like_filter(mcp_server, setup_test_database): # Test with LIKE filter result = await client.call_tool("list_tables", {"database": test_db, "like": "test_%"}) - tables_data = json.loads(result.content[0].text) + response = json.loads(result.content[0].text) - # Handle both single dict and list of dicts - if isinstance(tables_data, dict): - tables = [tables_data] - else: - tables = tables_data + assert isinstance(response, dict) + assert "tables" in response + tables = response["tables"] assert len(tables) == 1 assert tables[0]["name"] == test_table @@ -170,14 +176,12 @@ async def test_list_tables_with_not_like_filter(mcp_server, setup_test_database) # Test with NOT LIKE filter result = await client.call_tool("list_tables", {"database": test_db, "not_like": "test_%"}) - tables_data = json.loads(result.content[0].text) + response = json.loads(result.content[0].text) - # Handle both single dict and list of dicts - if isinstance(tables_data, dict): - tables = [tables_data] - else: - tables = tables_data + assert isinstance(response, dict) + assert "tables" in response + tables = response["tables"] assert len(tables) == 1 assert tables[0]["name"] == test_table2 @@ -286,7 +290,11 @@ async def test_table_metadata_details(mcp_server, setup_test_database): async with Client(mcp_server) as client: result = await client.call_tool("list_tables", {"database": test_db}) - tables = json.loads(result.content[0].text) + response = json.loads(result.content[0].text) + + assert isinstance(response, dict) + assert "tables" in response + tables = response["tables"] # Find our test table test_table_info = next(t for t in tables if t["name"] == test_table) @@ -322,12 +330,16 @@ async def test_table_metadata_details(mcp_server, setup_test_database): async def test_system_database_access(mcp_server): """Test that we can access system databases.""" async with Client(mcp_server) as client: - # List tables in system database - result = await client.call_tool("list_tables", {"database": "system"}) - tables = json.loads(result.content[0].text) + # List tables in system database with larger page size + result = await client.call_tool("list_tables", {"database": "system", "page_size": 100}) + response = json.loads(result.content[0].text) + + assert isinstance(response, dict) + assert "tables" in response + assert "total_tables" in response + tables = response["tables"] - # System database should have many tables - assert len(tables) > 10 + assert response["total_tables"] > 10 # Check for some common system tables table_names = [t["name"] for t in tables] diff --git a/tests/test_pagination.py b/tests/test_pagination.py new file mode 100644 index 0000000..547f41c --- /dev/null +++ b/tests/test_pagination.py @@ -0,0 +1,258 @@ +import unittest + +from dotenv import load_dotenv + +from mcp_clickhouse import ( + create_clickhouse_client, + create_page_token, + fetch_table_names_from_system, + get_paginated_table_data, + list_tables, + table_pagination_cache, +) +from mcp_clickhouse.mcp_server import Table + +load_dotenv() + + +class TestPagination(unittest.TestCase): + @classmethod + def setUpClass(cls): + """Set up the environment before tests.""" + cls.client = create_clickhouse_client() + + cls.test_db = "test_pagination_db" + cls.client.command(f"CREATE DATABASE IF NOT EXISTS {cls.test_db}") + + for i in range(1, 11): + table_name = f"test_table_{i}" + cls.client.command(f"DROP TABLE IF EXISTS {cls.test_db}.{table_name}") + + cls.client.command(f""" + CREATE TABLE {cls.test_db}.{table_name} ( + id UInt32 COMMENT 'ID field {i}', + name String COMMENT 'Name field {i}' + ) ENGINE = MergeTree() + ORDER BY id + COMMENT 'Test table {i} for pagination testing' + """) + cls.client.command(f""" + INSERT INTO {cls.test_db}.{table_name} (id, name) VALUES ({i}, 'Test {i}') + """) + + @classmethod + def tearDownClass(cls): + """Clean up the environment after tests.""" + cls.client.command(f"DROP DATABASE IF EXISTS {cls.test_db}") + + def test_list_tables_pagination(self): + """Test that list_tables returns paginated results.""" + result = list_tables(self.test_db, page_size=3) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + self.assertIn("next_page_token", result) + self.assertIn("total_tables", result) + self.assertEqual(len(result["tables"]), 3) + self.assertIsNotNone(result["next_page_token"]) + self.assertEqual(result["total_tables"], 10) + + page_token = result["next_page_token"] + result2 = list_tables(self.test_db, page_token=page_token, page_size=3) + self.assertEqual(len(result2["tables"]), 3) + self.assertIsNotNone(result2["next_page_token"]) + + page1_table_names = {table["name"] for table in result["tables"]} + page2_table_names = {table["name"] for table in result2["tables"]} + self.assertEqual(len(page1_table_names.intersection(page2_table_names)), 0) + + page_token = result2["next_page_token"] + result3 = list_tables(self.test_db, page_token=page_token, page_size=3) + self.assertEqual(len(result3["tables"]), 3) + self.assertIsNotNone(result3["next_page_token"]) + + page_token = result3["next_page_token"] + result4 = list_tables(self.test_db, page_token=page_token, page_size=3) + self.assertEqual(len(result4["tables"]), 1) + self.assertIsNone(result4["next_page_token"]) + + def test_invalid_page_token(self): + """Test that list_tables handles invalid page tokens gracefully.""" + result = list_tables(self.test_db, page_token="invalid_token", page_size=3) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + self.assertIn("next_page_token", result) + self.assertEqual(len(result["tables"]), 3) + + def test_token_for_different_database(self): + """Test handling a token for a different database.""" + result = list_tables(self.test_db, page_size=3) + page_token = result["next_page_token"] + test_db2 = "test_pagination_db2" + try: + self.client.command(f"CREATE DATABASE IF NOT EXISTS {test_db2}") + self.client.command(f""" + CREATE TABLE {test_db2}.test_table ( + id UInt32, + name String + ) ENGINE = MergeTree() + ORDER BY id + """) + + result2 = list_tables(test_db2, page_token=page_token, page_size=3) + self.assertIsInstance(result2, dict) + self.assertIn("tables", result2) + finally: + self.client.command(f"DROP DATABASE IF EXISTS {test_db2}") + + def test_different_page_sizes(self): + """Test pagination with different page sizes.""" + result = list_tables(self.test_db, page_size=20) + self.assertEqual(len(result["tables"]), 10) + self.assertIsNone(result["next_page_token"]) + + result = list_tables(self.test_db, page_size=5) + self.assertEqual(len(result["tables"]), 5) + self.assertIsNotNone(result["next_page_token"]) + + page_token = result["next_page_token"] + result2 = list_tables(self.test_db, page_token=page_token, page_size=5) + self.assertEqual(len(result2["tables"]), 5) + self.assertIsNone(result2["next_page_token"]) + + def test_page_token_expiry(self): + """Test that page tokens expire after their TTL.""" + result = list_tables(self.test_db, page_size=3) + page_token = result["next_page_token"] + + self.assertIn(page_token, table_pagination_cache) + + # For this test manually remove the token from the cache to simulate expiration + # since we can't easily wait for the actual TTL (1 hour) to expire + if page_token in table_pagination_cache: + del table_pagination_cache[page_token] + + # Try to use the expired token + result2 = list_tables(self.test_db, page_token=page_token, page_size=3) + # Should fall back to first page + self.assertEqual(len(result2["tables"]), 3) + self.assertIsNotNone(result2["next_page_token"]) + + def test_helper_functions(self): + """Test the individual helper functions used for pagination.""" + client = create_clickhouse_client() + + table_names = fetch_table_names_from_system(client, self.test_db) + self.assertEqual(len(table_names), 10) + for i in range(1, 11): + self.assertIn(f"test_table_{i}", table_names) + + tables, end_idx, has_more = get_paginated_table_data( + client, self.test_db, table_names, 0, 3 + ) + self.assertEqual(len(tables), 3) + self.assertEqual(end_idx, 3) + self.assertTrue(has_more) + + for table in tables: + self.assertIsInstance(table, Table) + self.assertEqual(table.database, self.test_db) + self.assertIsInstance(table.columns, list) + + token = create_page_token(self.test_db, None, None, table_names, 3, True) + self.assertIn(token, table_pagination_cache) + cached_state = table_pagination_cache[token] + self.assertEqual(cached_state["database"], self.test_db) + self.assertEqual(cached_state["start_idx"], 3) + self.assertEqual(cached_state["table_names"], table_names) + self.assertEqual(cached_state["include_detailed_columns"], True) + + def test_filters_with_pagination(self): + """Test pagination with LIKE and NOT LIKE filters.""" + result = list_tables(self.test_db, like="test_table_%", page_size=5) + self.assertEqual(len(result["tables"]), 5) + self.assertIsNotNone(result["next_page_token"]) + + result2 = list_tables( + self.test_db, like="test_table_%", page_token=result["next_page_token"], page_size=5 + ) + self.assertEqual(len(result2["tables"]), 5) + self.assertIsNone(result2["next_page_token"]) + + result3 = list_tables(self.test_db, not_like="test_table_1%", page_size=10) + self.assertEqual(len(result3["tables"]), 8) + self.assertIsNone(result3["next_page_token"]) + + def test_metadata_trimming(self): + """Test that include_detailed_columns parameter works correctly.""" + result_with_columns = list_tables(self.test_db, page_size=3, include_detailed_columns=True) + self.assertIsInstance(result_with_columns, dict) + self.assertIn("tables", result_with_columns) + + tables_with_columns = result_with_columns["tables"] + self.assertEqual(len(tables_with_columns), 3) + + for table in tables_with_columns: + self.assertIn("columns", table) + self.assertIsInstance(table["columns"], list) + self.assertGreater(len(table["columns"]), 0) + for col in table["columns"]: + self.assertIn("name", col) + self.assertIn("column_type", col) + + result_without_columns = list_tables( + self.test_db, page_size=3, include_detailed_columns=False + ) + self.assertIsInstance(result_without_columns, dict) + self.assertIn("tables", result_without_columns) + + tables_without_columns = result_without_columns["tables"] + self.assertEqual(len(tables_without_columns), 3) + + for table in tables_without_columns: + self.assertIn("columns", table) + self.assertIsInstance(table["columns"], list) + self.assertEqual(len(table["columns"]), 0) + self.assertIn("create_table_query", table) + self.assertIsInstance(table["create_table_query"], str) + self.assertGreater(len(table["create_table_query"]), 0) + + def test_metadata_trimming_with_pagination(self): + """Test that metadata trimming works across multiple pages.""" + result1 = list_tables(self.test_db, page_size=3, include_detailed_columns=False) + self.assertEqual(len(result1["tables"]), 3) + self.assertIsNotNone(result1["next_page_token"]) + + for table in result1["tables"]: + self.assertEqual(len(table["columns"]), 0) + + result2 = list_tables( + self.test_db, + page_token=result1["next_page_token"], + page_size=3, + include_detailed_columns=False, + ) + self.assertEqual(len(result2["tables"]), 3) + + for table in result2["tables"]: + self.assertEqual(len(table["columns"]), 0) + + def test_metadata_setting_mismatch_resets_pagination(self): + """Test that changing include_detailed_columns invalidates page token.""" + result1 = list_tables(self.test_db, page_size=3, include_detailed_columns=True) + page_token = result1["next_page_token"] + + result2 = list_tables( + self.test_db, + page_token=page_token, + page_size=3, + include_detailed_columns=False, + ) + + self.assertEqual(len(result2["tables"]), 3) + table_names_1 = [t["name"] for t in result1["tables"]] + table_names_2 = [t["name"] for t in result2["tables"]] + self.assertEqual(table_names_1, table_names_2) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_tool.py b/tests/test_tool.py index 50878c4..bfc5515 100644 --- a/tests/test_tool.py +++ b/tests/test_tool.py @@ -51,16 +51,20 @@ def test_list_databases(self): def test_list_tables_without_like(self): """Test listing tables without a 'LIKE' filter.""" result = list_tables(self.test_db) - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["name"], self.test_table) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + tables = result["tables"] + self.assertEqual(len(tables), 1) + self.assertEqual(tables[0]["name"], self.test_table) def test_list_tables_with_like(self): """Test listing tables with a 'LIKE' filter.""" result = list_tables(self.test_db, like=f"{self.test_table}%") - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) - self.assertEqual(result[0]["name"], self.test_table) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + tables = result["tables"] + self.assertEqual(len(tables), 1) + self.assertEqual(tables[0]["name"], self.test_table) def test_run_select_query_success(self): """Test running a SELECT query successfully.""" @@ -84,10 +88,12 @@ def test_run_select_query_failure(self): def test_table_and_column_comments(self): """Test that table and column comments are correctly retrieved.""" result = list_tables(self.test_db) - self.assertIsInstance(result, list) - self.assertEqual(len(result), 1) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + tables = result["tables"] + self.assertEqual(len(tables), 1) - table_info = result[0] + table_info = tables[0] # Verify table comment self.assertEqual(table_info["comment"], "Test table for unit testing") diff --git a/uv.lock b/uv.lock index fdef9cb..71dd004 100644 --- a/uv.lock +++ b/uv.lock @@ -61,6 +61,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, ] +[[package]] +name = "cachetools" +version = "6.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201", size = 31325, upload-time = "2025-10-12T14:55:30.139Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" }, +] + [[package]] name = "certifi" version = "2025.10.5" @@ -792,6 +801,7 @@ name = "mcp-clickhouse" version = "0.1.12" source = { editable = "." } dependencies = [ + { name = "cachetools" }, { name = "chdb" }, { name = "clickhouse-connect" }, { name = "fastmcp" }, @@ -808,6 +818,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "cachetools", specifier = ">=5.5.0" }, { name = "chdb", specifier = ">=3.3.0" }, { name = "clickhouse-connect", specifier = ">=0.8.16" }, { name = "fastmcp", specifier = ">=2.0.0" }, From 765ddea90d15d54741a2963541216fdbba8dd73c Mon Sep 17 00:00:00 2001 From: Joe S Date: Mon, 20 Oct 2025 11:54:29 -0700 Subject: [PATCH 2/5] fix empty table bug --- mcp_clickhouse/mcp_server.py | 3 +++ tests/test_tool.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/mcp_clickhouse/mcp_server.py b/mcp_clickhouse/mcp_server.py index 452d1f9..a37b4dc 100644 --- a/mcp_clickhouse/mcp_server.py +++ b/mcp_clickhouse/mcp_server.py @@ -197,6 +197,9 @@ def get_paginated_table_data( end_idx = min(start_idx + page_size, len(table_names)) current_page_table_names = table_names[start_idx:end_idx] + if not current_page_table_names: + return [], end_idx, False + query = f""" SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, diff --git a/tests/test_tool.py b/tests/test_tool.py index bfc5515..e9f0f33 100644 --- a/tests/test_tool.py +++ b/tests/test_tool.py @@ -104,6 +104,31 @@ def test_table_and_column_comments(self): self.assertEqual(columns["id"]["comment"], "Primary identifier") self.assertEqual(columns["name"]["comment"], "User name field") + def test_list_tables_empty_database(self): + """Test listing tables in an empty database returns empty list without errors.""" + empty_db = "test_empty_db" + + self.client.command(f"CREATE DATABASE IF NOT EXISTS {empty_db}") + + try: + result = list_tables(empty_db) + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + self.assertEqual(len(result["tables"]), 0) + self.assertEqual(result["total_tables"], 0) + self.assertIsNone(result["next_page_token"]) + finally: + self.client.command(f"DROP DATABASE IF EXISTS {empty_db}") + + def test_list_tables_with_not_like_filter_excluding_all(self): + """Test listing tables with a NOT LIKE filter that excludes all tables.""" + result = list_tables(self.test_db, not_like="%") + self.assertIsInstance(result, dict) + self.assertIn("tables", result) + self.assertEqual(len(result["tables"]), 0) + self.assertEqual(result["total_tables"], 0) + self.assertIsNone(result["next_page_token"]) + if __name__ == "__main__": unittest.main() From 693092874cea4c4fbfcf0501442a9f31bfef0271 Mon Sep 17 00:00:00 2001 From: Joe S Date: Mon, 20 Oct 2025 12:30:05 -0700 Subject: [PATCH 3/5] update readme --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3db4b9e..aab9de7 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,17 @@ An MCP server for ClickHouse. * List all databases on your ClickHouse cluster. * `list_tables` - * List all tables in a database. - * Input: `database` (string): The name of the database. + * List tables in a database with pagination. + * Required input: `database` (string). + * Optional inputs: + * `like` / `not_like` (string): Apply `LIKE` or `NOT LIKE` filters to table names. + * `page_token` (string): Token returned by a previous call for fetching the next page. + * `page_size` (int, default `50`): Number of tables returned per page. + * `include_detailed_columns` (bool, default `true`): When `false`, omits column metadata for lighter responses while keeping the full `create_table_query`. + * Response shape: + * `tables`: Array of table objects for the current page. + * `next_page_token`: Pass this value back to fetch the next page, or `null` when there are no more tables. + * `total_tables`: Total count of tables that match the supplied filters. ### chDB Tools From a56a370b29ac2b8d2d4b63686a1e58ed21d3b513 Mon Sep 17 00:00:00 2001 From: Joe S Date: Mon, 20 Oct 2025 13:04:31 -0700 Subject: [PATCH 4/5] reset import order --- mcp_clickhouse/mcp_server.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mcp_clickhouse/mcp_server.py b/mcp_clickhouse/mcp_server.py index a37b4dc..4ba71ab 100644 --- a/mcp_clickhouse/mcp_server.py +++ b/mcp_clickhouse/mcp_server.py @@ -1,26 +1,26 @@ -import atexit -import concurrent.futures -import json import logging +import json +from typing import Optional, List, Any, Dict +import concurrent.futures +import atexit import os import uuid -from dataclasses import asdict, dataclass, field, is_dataclass -from typing import Any, Dict, List, Optional -import chdb.session as chs import clickhouse_connect -from cachetools import TTLCache +import chdb.session as chs from clickhouse_connect.driver.binding import format_query_value from dotenv import load_dotenv from fastmcp import FastMCP -from fastmcp.exceptions import ToolError -from fastmcp.prompts import Prompt +from cachetools import TTLCache from fastmcp.tools import Tool +from fastmcp.prompts import Prompt +from fastmcp.exceptions import ToolError +from dataclasses import dataclass, field, asdict, is_dataclass from starlette.requests import Request from starlette.responses import PlainTextResponse +from mcp_clickhouse.mcp_env import get_config, get_chdb_config, get_mcp_config from mcp_clickhouse.chdb_prompt import CHDB_PROMPT -from mcp_clickhouse.mcp_env import get_chdb_config, get_config, get_mcp_config @dataclass From 57a5bedcf915c1f1207a99b6c052a29d976e95a8 Mon Sep 17 00:00:00 2001 From: Joe S Date: Tue, 21 Oct 2025 08:52:55 -0700 Subject: [PATCH 5/5] remove unused docstring args --- mcp_clickhouse/mcp_server.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mcp_clickhouse/mcp_server.py b/mcp_clickhouse/mcp_server.py index 4ba71ab..e90c2b1 100644 --- a/mcp_clickhouse/mcp_server.py +++ b/mcp_clickhouse/mcp_server.py @@ -187,8 +187,6 @@ def get_paginated_table_data( table_names: List of all table names to paginate start_idx: Starting index for pagination page_size: Number of tables per page - like: Optional LIKE filter - not_like: Optional NOT LIKE filter include_detailed_columns: Whether to include detailed column metadata (default: True) Returns: