From f017e545485302fdd06d865844a4674a7d51c3fe Mon Sep 17 00:00:00 2001
From: Keyur Doshi <keyur.doshi@harness.io>
Date: Wed, 23 Jul 2025 13:50:59 +0530
Subject: [PATCH 1/2] Implemented vector index for chat history context and MCP
 tool for semantic search & summarization

---
 services/chatbot/src/chatbot/chat_service.py  | 10 ++--
 services/chatbot/src/chatbot/vector_index.py  | 45 +++++++++++++++++
 services/chatbot/src/mcpserver/server.py      | 25 ++++++++++
 .../chatbot/src/mcpserver/tool_helpers.py     | 48 +++++++++++++++++++
 4 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 services/chatbot/src/chatbot/vector_index.py
 create mode 100644 services/chatbot/src/mcpserver/tool_helpers.py

diff --git a/services/chatbot/src/chatbot/chat_service.py b/services/chatbot/src/chatbot/chat_service.py
index 41846c4f..58a5ec1f 100644
--- a/services/chatbot/src/chatbot/chat_service.py
+++ b/services/chatbot/src/chatbot/chat_service.py
@@ -1,9 +1,9 @@
 from uuid import uuid4
-
+import os
 from langgraph.graph.message import Messages
-
+from .vector_index import build_vector_index_from_chat_history, update_vector_index, retrieval_index_path
 from .extensions import db
-from .langgraph_agent import build_langgraph_agent, execute_langgraph_agent
+from .langgraph_agent import execute_langgraph_agent
 
 
 async def get_chat_history(session_id):
@@ -39,4 +39,8 @@ async def process_user_message(session_id, user_message, api_key, model_name, us
     # Limit chat history to last 20 messages
     history = history[-20:]
     await update_chat_history(session_id, history)
+    if not os.path.exists(retrieval_index_path):
+        await build_vector_index_from_chat_history(api_key)
+    else:
+        await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content})
     return reply.content, response_message_id
diff --git a/services/chatbot/src/chatbot/vector_index.py b/services/chatbot/src/chatbot/vector_index.py
new file mode 100644
index 00000000..0eeb4dff
--- /dev/null
+++ b/services/chatbot/src/chatbot/vector_index.py
@@ -0,0 +1,45 @@
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
+from .extensions import db
+
+retrieval_index_path = "/app/resources/chat_index"
+
+async def build_vector_index_from_chat_history(api_key):
+    docs = []
+    async for chat in db.chat_sessions.find({}):
+        session_id = chat.get("session_id", "unknown")
+        messages = chat.get("messages", [])
+        for msg in messages:
+            role = msg.get("role")
+            content = msg.get("content")
+            if content:
+                doc = Document(
+                    page_content=content,
+                    metadata={"session_id": session_id, "role": role}
+                )
+                docs.append(doc)
+
+    embeddings = OpenAIEmbeddings(api_key=api_key)
+    vectorstore = FAISS.from_documents(docs, embeddings)
+    vectorstore.save_local(retrieval_index_path)
+
+async def update_vector_index(api_key, session_id, new_messages):
+    docs = []
+    for role, content in new_messages.items():
+        if content:
+            doc = Document(
+                page_content=content,
+                metadata={"session_id": session_id, "role": role}
+            )
+            docs.append(doc)
+
+    if docs:
+        embeddings = OpenAIEmbeddings(api_key=api_key)
+        vectorstore = FAISS.load_local(
+            retrieval_index_path,
+            embeddings,
+            allow_dangerous_deserialization=True,
+        )
+        vectorstore.add_documents(docs)
+        vectorstore.save_local(retrieval_index_path)
\ No newline at end of file
diff --git a/services/chatbot/src/mcpserver/server.py b/services/chatbot/src/mcpserver/server.py
index c476d6db..78760626 100644
--- a/services/chatbot/src/mcpserver/server.py
+++ b/services/chatbot/src/mcpserver/server.py
@@ -4,6 +4,10 @@
 import os
 import logging
 import time
+from .tool_helpers import (
+    get_any_api_key,
+    get_chat_history_retriever,
+)
 # Configure logging
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -77,6 +81,27 @@ def get_http_client():
     name="My crAPI MCP Server"
 )
 
+@mcp.tool(tags={"history", "search", "summary", "context"},)
+async def search_chat_history(question: str) -> str:
+    """Answer questions based on user chat history (summarized and semantically indexed). 
+    Use this when the user asks about prior chats, what they asked earlier, or wants a summary of past conversations.    
+    Answer questions based on the user's prior chat history.
+
+    Use this tool when the user refers to anything mentioned before, asks for a summary of previous messages or sessions, 
+    or references phrases like 'what I said earlier', 'things we discussed', 'my earlier question', 'until now', 'till date', 'all my conversations' or 'previously mentioned'.
+    The chat history is semantically indexed and summarized using vector search."""
+
+    logger.info(f"search_chat_history called with: {question}")
+    api_key=await get_any_api_key()
+    if not api_key:
+        logger.error("API key is not available. Cannot search chat history.")
+        return "OpenAI API key is not available. Cannot search chat history."
+    retriever = await get_chat_history_retriever(api_key=api_key)
+    response = await retriever.ainvoke({"query": question})
+    result = response["result"]
+    logger.info(f"RESULT: {result}")
+    return result
+
 if __name__ == "__main__":
     mcp_server_port = int(os.environ.get("MCP_SERVER_PORT", 5500))
     mcp.run(transport="streamable-http", host="0.0.0.0", port=mcp_server_port,)
diff --git a/services/chatbot/src/mcpserver/tool_helpers.py b/services/chatbot/src/mcpserver/tool_helpers.py
new file mode 100644
index 00000000..3556892a
--- /dev/null
+++ b/services/chatbot/src/mcpserver/tool_helpers.py
@@ -0,0 +1,48 @@
+import os
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.prompts import PromptTemplate
+from chatbot.extensions import db
+from langchain.chains import RetrievalQA
+from langchain_openai import ChatOpenAI
+
+retrieval_index_path = "/app/resources/chat_index"
+
+async def get_any_api_key():
+    if os.environ.get("CHATBOT_OPENAI_API_KEY"):
+        return os.environ.get("CHATBOT_OPENAI_API_KEY")
+    doc = await db.sessions.find_one(
+        {"openai_api_key": {"$exists": True, "$ne": None}},
+        {"openai_api_key": 1}
+    )
+    if doc and "openai_api_key" in doc:
+        return doc["openai_api_key"]    
+    return None
+
+async def get_chat_history_retriever(api_key: str):
+    prompt_template = PromptTemplate.from_template(
+        """You are an assistant that summarizes chat history across sessions.
+
+        Given the following chat excerpts:
+        {context}
+        Answer the user's question: {question}
+
+        If the user asks for a summary, provide a coherent, high-level summary of the conversations in natural language.
+        If the user asks a specific question, extract and answer it from the chats.
+        Be detailed, accurate, and neutral."""
+    )
+    embeddings = OpenAIEmbeddings(api_key=api_key)
+    vectorstore = FAISS.load_local(
+        retrieval_index_path,
+        embeddings,
+        allow_dangerous_deserialization=True
+    )
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=ChatOpenAI(api_key=api_key, model="gpt-4o"),
+        retriever=retriever,
+        chain_type="stuff",
+        chain_type_kwargs={"prompt": prompt_template, "document_variable_name": "context"},
+        return_source_documents=False,
+    )
+    return qa_chain

From a9257ec84cc354a008a03bd15cbcb1ebcc0ad31f Mon Sep 17 00:00:00 2001
From: Keyur Doshi <keyur.doshi@harness.io>
Date: Fri, 25 Jul 2025 06:14:43 +0530
Subject: [PATCH 2/2] Persisted storage of vectors using Chroma

---
 deploy/docker/docker-compose.yml              |  4 +++
 deploy/helm/templates/chatbot/config.yaml     |  1 +
 deploy/helm/templates/chatbot/deployment.yaml |  7 ++++
 deploy/helm/templates/chatbot/storage.yaml    | 33 +++++++++++++++++
 deploy/helm/values-pv.yaml                    | 19 ++++++++++
 deploy/helm/values.yaml                       | 18 ++++++++++
 services/chatbot/Dockerfile                   |  1 +
 services/chatbot/requirements.txt             |  3 +-
 services/chatbot/src/chatbot/chat_service.py  | 11 +++---
 services/chatbot/src/chatbot/config.py        |  1 +
 services/chatbot/src/chatbot/vector_index.py  | 36 ++++---------------
 .../chatbot/src/mcpserver/tool_helpers.py     | 12 +++----
 12 files changed, 104 insertions(+), 42 deletions(-)
 create mode 100644 deploy/helm/templates/chatbot/storage.yaml

diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml
index 820119cd..50e2d85a 100755
--- a/deploy/docker/docker-compose.yml
+++ b/deploy/docker/docker-compose.yml
@@ -167,7 +167,10 @@ services:
       - MONGO_DB_PASSWORD=crapisecretpassword
       - MONGO_DB_NAME=crapi
       - DEFAULT_MODEL=gpt-4o-mini
+      - CHROMA_PERSIST_DIRECTORY=/app/vectorstore
       # - CHATBOT_OPENAI_API_KEY=
+    volumes:
+      - chatbot-vectors:/app/vectorstore
     depends_on:
       mongodb:
         condition: service_healthy
@@ -295,3 +298,4 @@ services:
 volumes:
   mongodb-data:
   postgresql-data:
+  chatbot-vectors:
diff --git a/deploy/helm/templates/chatbot/config.yaml b/deploy/helm/templates/chatbot/config.yaml
index 9afbb753..215d9af8 100644
--- a/deploy/helm/templates/chatbot/config.yaml
+++ b/deploy/helm/templates/chatbot/config.yaml
@@ -22,3 +22,4 @@ data:
     MONGO_DB_NAME: {{ .Values.mongodb.config.mongoDbName }}
     CHATBOT_OPENAI_API_KEY: {{ .Values.openAIApiKey }}
     DEFAULT_MODEL: {{ .Values.chatbot.config.defaultModel | quote }}
+    CHROMA_PERSIST_DIRECTORY: {{ .Values.chatbot.config.chromaPersistDirectory | quote }}
diff --git a/deploy/helm/templates/chatbot/deployment.yaml b/deploy/helm/templates/chatbot/deployment.yaml
index 692dfa0c..f58c047f 100644
--- a/deploy/helm/templates/chatbot/deployment.yaml
+++ b/deploy/helm/templates/chatbot/deployment.yaml
@@ -57,3 +57,10 @@ spec:
               port: {{ .Values.chatbot.port }}
             initialDelaySeconds: 15
             periodSeconds: 10
+          volumeMounts:
+            - name: chatbot-vectors
+              mountPath: {{ .Values.chatbot.config.chromaPersistDirectory | quote }}
+      volumes:
+        - name: chatbot-vectors
+          persistentVolumeClaim:
+            claimName: {{ .Values.chatbot.storage.pvc.name }}
diff --git a/deploy/helm/templates/chatbot/storage.yaml b/deploy/helm/templates/chatbot/storage.yaml
new file mode 100644
index 00000000..bace224f
--- /dev/null
+++ b/deploy/helm/templates/chatbot/storage.yaml
@@ -0,0 +1,33 @@
+{{- if eq .Values.chatbot.storage.type "manual" }}
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.chatbot.storage.pv.name }}
+  labels:
+    release: {{ .Release.Name }}
+    {{- toYaml .Values.chatbot.storage.pv.labels | nindent 4 }}
+spec:
+  storageClassName: {{ .Values.chatbot.storage.type }}
+  capacity:
+    storage: {{ .Values.chatbot.storage.pv.resources.storage }}
+  accessModes:
+    - ReadWriteOnce
+  hostPath:
+    path: {{ .Values.chatbot.storage.pv.hostPath }}
+---
+{{- end }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.chatbot.storage.pvc.name }}
+  labels:
+    release: {{ .Release.Name }}
+    {{- toYaml .Values.chatbot.storage.pvc.labels | nindent 4 }}
+spec:
+  {{- if ne .Values.chatbot.storage.type "default" }}
+  storageClassName: {{ .Values.chatbot.storage.type }}
+  {{- end }}
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    {{- toYaml .Values.chatbot.storage.pvc.resources | nindent 4 }}
diff --git a/deploy/helm/values-pv.yaml b/deploy/helm/values-pv.yaml
index ac602899..691b1fa9 100644
--- a/deploy/helm/values-pv.yaml
+++ b/deploy/helm/values-pv.yaml
@@ -35,3 +35,22 @@ postgresdb:
       resources:
         requests:
           storage: 2Gi
+
+chatbot:
+  storage:
+    type: "manual"
+    pv:
+      name: chatbot-vectors-pv
+      labels:
+        app: crapi-chatbot
+      resources:
+        storage: 1Gi
+      hostPath: /mnt/vectorstore
+    type: "default"
+    pvc:
+      name: chatbot-vectors-pv-claim
+      labels:
+        app: crapi-chatbot
+      resources:
+        requests:
+          storage: 1Gi
\ No newline at end of file
diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml
index 9122ff70..030bf65c 100644
--- a/deploy/helm/values.yaml
+++ b/deploy/helm/values.yaml
@@ -152,6 +152,24 @@ chatbot:
     mongoDbDriver: mongodb
     secretKey: crapi
     defaultModel: gpt-4o-mini
+    chromaPersistDirectory: /app/vectorstore
+  storage:
+    # type: "manual"
+    # pv:
+    #   name: chatbot-vectors-pv
+    #   labels:
+    #     app: crapi-chatbot
+    #   resources:
+    #     storage: 1Gi
+    #   hostPath: /mnt/vectorstore
+    type: "default"
+    pvc:
+      name: chatbot-vectors-pv-claim
+      labels:
+        app: crapi-chatbot
+      resources:
+        requests:
+          storage: 1Gi
   deploymentLabels:
     app: crapi-chatbot
   podLabels:
diff --git a/services/chatbot/Dockerfile b/services/chatbot/Dockerfile
index fa28895e..5218361e 100644
--- a/services/chatbot/Dockerfile
+++ b/services/chatbot/Dockerfile
@@ -16,6 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY src /app
 COPY certs /app/certs
 COPY retrieval /app/retrieval
+RUN mkdir -p /app/vectorstore
 RUN mkdir -p /app/resources
 COPY src/resources/crapi-openapi-spec.json /app/resources/crapi-openapi-spec.json
 ENV PYTHONPATH="/app"
diff --git a/services/chatbot/requirements.txt b/services/chatbot/requirements.txt
index f3d0248c..d4cbc666 100644
--- a/services/chatbot/requirements.txt
+++ b/services/chatbot/requirements.txt
@@ -18,4 +18,5 @@ langgraph==0.5.1
 faiss-cpu==1.11.0
 psycopg2-binary
 uvicorn==0.35.0
-fastmcp==2.10.2
\ No newline at end of file
+fastmcp==2.10.2
+chromadb==1.0.15
\ No newline at end of file
diff --git a/services/chatbot/src/chatbot/chat_service.py b/services/chatbot/src/chatbot/chat_service.py
index 58a5ec1f..8a0491f6 100644
--- a/services/chatbot/src/chatbot/chat_service.py
+++ b/services/chatbot/src/chatbot/chat_service.py
@@ -1,7 +1,6 @@
 from uuid import uuid4
-import os
 from langgraph.graph.message import Messages
-from .vector_index import build_vector_index_from_chat_history, update_vector_index, retrieval_index_path
+from .vector_index import update_vector_index
 from .extensions import db
 from .langgraph_agent import execute_langgraph_agent
 
@@ -39,8 +38,8 @@ async def process_user_message(session_id, user_message, api_key, model_name, us
     # Limit chat history to last 20 messages
     history = history[-20:]
     await update_chat_history(session_id, history)
-    if not os.path.exists(retrieval_index_path):
-        await build_vector_index_from_chat_history(api_key)
-    else:
-        await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content})
+    # if not os.path.exists(retrieval_index_path):
+    #     await build_vector_index_from_chat_history(api_key)
+    # else:
+    await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content})
     return reply.content, response_message_id
diff --git a/services/chatbot/src/chatbot/config.py b/services/chatbot/src/chatbot/config.py
index 2994d906..eb7d81f7 100644
--- a/services/chatbot/src/chatbot/config.py
+++ b/services/chatbot/src/chatbot/config.py
@@ -11,3 +11,4 @@ class Config:
     SECRET_KEY = os.getenv("SECRET_KEY", "super-secret")
     MONGO_URI = MONGO_CONNECTION_URI
     DEFAULT_MODEL_NAME = os.getenv("DEFAULT_MODEL", "gpt-4o-mini")
+    CHROMA_PERSIST_DIRECTORY = os.getenv("CHROMA_PERSIST_DIRECTORY", "/app/vectorstore")
diff --git a/services/chatbot/src/chatbot/vector_index.py b/services/chatbot/src/chatbot/vector_index.py
index 0eeb4dff..8397ea9a 100644
--- a/services/chatbot/src/chatbot/vector_index.py
+++ b/services/chatbot/src/chatbot/vector_index.py
@@ -1,28 +1,7 @@
 from langchain_community.embeddings import OpenAIEmbeddings
-from langchain_community.vectorstores import FAISS
+from langchain_community.vectorstores import Chroma
 from langchain_core.documents import Document
-from .extensions import db
-
-retrieval_index_path = "/app/resources/chat_index"
-
-async def build_vector_index_from_chat_history(api_key):
-    docs = []
-    async for chat in db.chat_sessions.find({}):
-        session_id = chat.get("session_id", "unknown")
-        messages = chat.get("messages", [])
-        for msg in messages:
-            role = msg.get("role")
-            content = msg.get("content")
-            if content:
-                doc = Document(
-                    page_content=content,
-                    metadata={"session_id": session_id, "role": role}
-                )
-                docs.append(doc)
-
-    embeddings = OpenAIEmbeddings(api_key=api_key)
-    vectorstore = FAISS.from_documents(docs, embeddings)
-    vectorstore.save_local(retrieval_index_path)
+from .config import Config
 
 async def update_vector_index(api_key, session_id, new_messages):
     docs = []
@@ -35,11 +14,10 @@ async def update_vector_index(api_key, session_id, new_messages):
             docs.append(doc)
 
     if docs:
-        embeddings = OpenAIEmbeddings(api_key=api_key)
-        vectorstore = FAISS.load_local(
-            retrieval_index_path,
-            embeddings,
-            allow_dangerous_deserialization=True,
+        embeddings = OpenAIEmbeddings(api_key=api_key, model="text-embedding-3-large")
+        vectorstore = Chroma(
+            embedding_function=embeddings,
+            persist_directory=Config.CHROMA_PERSIST_DIRECTORY
         )
         vectorstore.add_documents(docs)
-        vectorstore.save_local(retrieval_index_path)
\ No newline at end of file
+        vectorstore.persist()
\ No newline at end of file
diff --git a/services/chatbot/src/mcpserver/tool_helpers.py b/services/chatbot/src/mcpserver/tool_helpers.py
index 3556892a..d78066ce 100644
--- a/services/chatbot/src/mcpserver/tool_helpers.py
+++ b/services/chatbot/src/mcpserver/tool_helpers.py
@@ -1,8 +1,9 @@
 import os
 from langchain_community.embeddings import OpenAIEmbeddings
-from langchain_community.vectorstores import FAISS
+from langchain_community.vectorstores import Chroma
 from langchain.prompts import PromptTemplate
 from chatbot.extensions import db
+from chatbot.config import Config
 from langchain.chains import RetrievalQA
 from langchain_openai import ChatOpenAI
 
@@ -31,11 +32,10 @@ async def get_chat_history_retriever(api_key: str):
         If the user asks a specific question, extract and answer it from the chats.
         Be detailed, accurate, and neutral."""
     )
-    embeddings = OpenAIEmbeddings(api_key=api_key)
-    vectorstore = FAISS.load_local(
-        retrieval_index_path,
-        embeddings,
-        allow_dangerous_deserialization=True
+    embeddings = OpenAIEmbeddings(api_key=api_key, model="text-embedding-3-large")
+    vectorstore = Chroma(
+        embedding_function=embeddings,
+        persist_directory=Config.CHROMA_PERSIST_DIRECTORY
     )
     retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
     qa_chain = RetrievalQA.from_chain_type(