CodeAtCode · Mte90 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/ai/analyzer.py b/ai/analyzer.py
@@ -15,14 +15,14 @@
     load_sqlite_vector_extension as _load_sqlite_vector_extension,
     ensure_chunks_and_meta as _ensure_chunks_and_meta,
     insert_chunk_vector_with_retry as _insert_chunk_vector_with_retry,
-    search_vectors as _search_vectors,
     get_chunk_text as _get_chunk_text,
 )
-from .openai import call_coding_api, EmbeddingClient
+from .openai import call_coding_api
+from .llama_embeddings import OpenAICompatibleEmbedding
+from .llama_chunker import chunk_with_llama_index
 from llama_index.core import Document
 from utils.logger import get_logger
-from utils import compute_file_hash, chunk_text, norm, cosine
-from .smart_chunker import smart_chunk
+from utils import compute_file_hash, norm, cosine
 import logging
 
 # reduce noise from httpx used by external libs
@@ -64,8 +64,8 @@
 
 logger = get_logger(__name__)
 
-# Initialize EmbeddingClient for structured logging and retry logic
-_embedding_client = EmbeddingClient()
+# Initialize llama-index embedding client
+_embedding_client = OpenAICompatibleEmbedding()
 
 # Thread-local storage to track execution state inside futures
 _thread_state = threading.local()
@@ -86,7 +86,8 @@ def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, fil
     semaphore.acquire()
     try:
         _thread_state.stage = "calling_embed_text"
-        result = _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index)
+        # Use llama-index embedding client
+        result = _embedding_client._get_text_embedding(text)
         _thread_state.stage = "completed"
         return result
     except Exception as e:
@@ -171,14 +172,8 @@ def _process_file_sync(
         if isinstance(cfg, dict):
             embedding_model = cfg.get("embedding_model")
 
-        # Use smart chunking for supported code languages
-        use_smart_chunking = cfg.get("smart_chunking", True) if isinstance(cfg, dict) else True
-        supported_languages = ["python", "javascript", "typescript", "java", "go", "rust", "c", "cpp"]
-
-        if use_smart_chunking and lang in supported_languages:
-            chunks = smart_chunk(content, language=lang, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP)
-        else:
-            chunks = chunk_text(content, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP)
+        # Use llama-index chunking for all content
+        chunks = chunk_with_llama_index(content, language=lang, chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
 
         if not chunks:
             chunks = [content]
@@ -395,11 +390,13 @@ def analyze_local_path_sync(
 
         try:
             # Use batch update for efficiency - single database transaction
+            # Store total_files for performance (avoid re-scanning directory on every request)
             set_project_metadata_batch(database_path, {
                 "last_indexed_at": time.strftime("%Y-%m-%d %H:%M:%S"),
                 "last_index_duration": str(duration),
                 "files_indexed": str(file_count),
-                "files_skipped": str(skipped_count)
+                "files_skipped": str(skipped_count),
+                "total_files": str(total_files)  # Store total files found during indexing
             })
         except Exception:
             logger.exception("Failed to store indexing metadata")
@@ -442,16 +439,40 @@ def analyze_local_path_background(local_path: str, database_path: str, venv_path
 
 def search_semantic(query: str, database_path: str, top_k: int = 5):
     """
-    Uses sqlite-vector's vector_full_scan to retrieve best-matching chunks and returns
-    a list of {file_id, path, chunk_index, score}.
+    Uses llama-index with sqlite-vector backend to retrieve best-matching chunks.
+    Always includes content as it's needed for the coding model context.
+
+    Args:
+        query: Search query text
+        database_path: Path to the SQLite database
+        top_k: Number of results to return
+
+    Returns:
+        List of dicts with file_id, path, chunk_index, score, and content
     """
-    q_emb = _embedding_client.embed_text(query, file_path="<query>", chunk_index=0)
-    if not q_emb:
-        return []
-
     try:
-        return _search_vectors(database_path, q_emb, top_k=top_k)
-    except Exception:
+        # Use llama-index for semantic search
+        from .llama_integration import llama_index_search
+
+        docs = llama_index_search(query, database_path, top_k=top_k)
+
+        results = []
+        for doc in docs:
+            metadata = doc.metadata or {}
+            result = {
+                "file_id": metadata.get("file_id", 0),
+                "path": metadata.get("path", ""),
+                "chunk_index": metadata.get("chunk_index", 0),
+                "score": metadata.get("score", 0.0),
+                "content": doc.text or ""  # Always include content for LLM context
+            }
+            results.append(result)
+
+        logger.info(f"llama-index search returned {len(results)} results")
+        return results
+
+    except Exception as e:
+        logger.exception(f"Semantic search failed: {e}")
         raise
 
 

diff --git a/ai/llama_chunker.py b/ai/llama_chunker.py
@@ -0,0 +1,111 @@
+"""
+LlamaIndex-based chunking for code and text.
+Replaces smart_chunker.py with llama-index's built-in splitters.
+"""
+from typing import List
+from llama_index.core.node_parser import CodeSplitter, SentenceSplitter
+from llama_index.core.schema import Document
+
+from utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def chunk_with_llama_index(
+    content: str,
+    language: str = "text",
+    chunk_size: int = 800,
+    chunk_overlap: int = 100
+) -> List[str]:
+    """
+    Chunk text or code using llama-index's splitters.
+
+    Args:
+        content: Text or code content to chunk
+        language: Programming language (python, javascript, etc.) or "text"
+        chunk_size: Target size for each chunk in characters
+        chunk_overlap: Overlap between chunks in characters
+
+    Returns:
+        List of text chunks
+    """
+    # Map language names to llama-index language identifiers
+    language_map = {
+        "python": "python",
+        "javascript": "js",
+        "typescript": "ts",
+        "java": "java",
+        "go": "go",
+        "rust": "rust",
+        "c": "c",
+        "cpp": "cpp",
+        "c++": "cpp",
+    }
+
+    try:
+        # Check if it's a supported code language
+        llama_lang = language_map.get(language.lower())
+
+        if llama_lang:
+            # Use CodeSplitter for code
+            splitter = CodeSplitter(
+                language=llama_lang,
+                chunk_lines=40,  # Target lines per chunk (approximation)
+                chunk_lines_overlap=5,  # Overlap in lines
+                max_chars=chunk_size
+            )
+            logger.debug(f"Using CodeSplitter for language: {llama_lang}")
+        else:
+            # Use SentenceSplitter for text or unknown languages
+            splitter = SentenceSplitter(
+                chunk_size=chunk_size,
+                chunk_overlap=chunk_overlap,
+                paragraph_separator="\n\n",
+                secondary_chunking_regex="[^,.;。？！]+[,.;。？！]?"
+            )
+            logger.debug(f"Using SentenceSplitter for language: {language}")
+
+        # Create a document and split it
+        doc = Document(text=content)
+        nodes = splitter.get_nodes_from_documents([doc])
+
+        # Extract text from nodes
+        chunks = [node.text for node in nodes if node.text]
+
+        logger.debug(f"Split content into {len(chunks)} chunks")
+        return chunks if chunks else [content]
+
+    except Exception as e:
+        logger.exception(f"Error chunking with llama-index: {e}")
+        # Fallback to simple chunking
+        return simple_chunk(content, chunk_size, chunk_overlap)
+
+
+def simple_chunk(text: str, chunk_size: int = 800, chunk_overlap: int = 100) -> List[str]:
+    """
+    Simple character-based chunking fallback.
+
+    Args:
+        text: Text to chunk
+        chunk_size: Size of each chunk
+        chunk_overlap: Overlap between chunks
+
+    Returns:
+        List of text chunks
+    """
+    if not text:
+        return []
+
+    chunks = []
+    step = max(1, chunk_size - chunk_overlap)
+
+    for i in range(0, len(text), step):
+        end = min(i + chunk_size, len(text))
+        chunk = text[i:end]
+        if chunk.strip():
+            chunks.append(chunk)
+
+        if end >= len(text):
+            break
+
+    return chunks if chunks else [text]
diff --git a/ai/llama_embeddings.py b/ai/llama_embeddings.py
@@ -0,0 +1,100 @@
+"""
+LlamaIndex-compatible embeddings using OpenAI API.
+Replaces the custom EmbeddingClient with llama-index's embedding abstraction.
+"""
+from typing import List, Optional
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.core.bridge.pydantic import PrivateAttr
+from openai import OpenAI
+
+from utils.config import CFG
+from utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class OpenAICompatibleEmbedding(BaseEmbedding):
+    """
+    LlamaIndex-compatible embedding model using OpenAI-compatible API.
+    Works with any OpenAI-compatible endpoint (OpenAI, Azure, local servers, etc.)
+    """
+
+    _client: OpenAI = PrivateAttr()
+    _model: str = PrivateAttr()
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        model: Optional[str] = None,
+        **kwargs
+    ):
+        """
+        Initialize the embedding model.
+
+        Args:
+            api_key: OpenAI API key (defaults to config)
+            api_base: API base URL (defaults to config)
+            model: Model name (defaults to config)
+        """
+        super().__init__(**kwargs)
+
+        # Get config values
+        self._client = OpenAI(
+            api_key=api_key or CFG.get("api_key"),
+            base_url=api_base or CFG.get("api_url")
+        )
+        self._model = model or CFG.get("embedding_model") or "text-embedding-3-small"
+
+        logger.info(f"Initialized OpenAICompatibleEmbedding with model: {self._model}")
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "OpenAICompatibleEmbedding"
+
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        """Get query embedding asynchronously."""
+        return self._get_query_embedding(query)
+
+    async def _aget_text_embedding(self, text: str) -> List[float]:
+        """Get text embedding asynchronously."""
+        return self._get_text_embedding(text)
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """Get embedding for a query."""
+        return self._get_text_embedding(query)
+
+    def _get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a text."""
+        try:
+            # Clean the text
+            text = text.replace("\n", " ").strip()
+            if not text:
+                logger.warning("Empty text provided for embedding")
+                return []
+
+            # Call OpenAI API
+            response = self._client.embeddings.create(
+                input=[text],
+                model=self._model
+            )
+
+            if response.data and len(response.data) > 0:
+                embedding = response.data[0].embedding
+                logger.debug(f"Generated embedding with dimension: {len(embedding)}")
+                return embedding
+            else:
+                logger.error("No embedding returned from API")
+                return []
+
+        except Exception as e:
+            logger.exception(f"Failed to generate embedding: {e}")
+            return []
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get embeddings for multiple texts."""
+        embeddings = []
+        for text in texts:
+            embedding = self._get_text_embedding(text)
+            embeddings.append(embedding)
+        return embeddings