From f017e545485302fdd06d865844a4674a7d51c3fe Mon Sep 17 00:00:00 2001 From: Keyur Doshi Date: Wed, 23 Jul 2025 13:50:59 +0530 Subject: [PATCH 1/2] Implemented vector index for chat history context and MCP tool for semantic search & summarization --- services/chatbot/src/chatbot/chat_service.py | 10 ++-- services/chatbot/src/chatbot/vector_index.py | 45 +++++++++++++++++ services/chatbot/src/mcpserver/server.py | 25 ++++++++++ .../chatbot/src/mcpserver/tool_helpers.py | 48 +++++++++++++++++++ 4 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 services/chatbot/src/chatbot/vector_index.py create mode 100644 services/chatbot/src/mcpserver/tool_helpers.py diff --git a/services/chatbot/src/chatbot/chat_service.py b/services/chatbot/src/chatbot/chat_service.py index 41846c4f..58a5ec1f 100644 --- a/services/chatbot/src/chatbot/chat_service.py +++ b/services/chatbot/src/chatbot/chat_service.py @@ -1,9 +1,9 @@ from uuid import uuid4 - +import os from langgraph.graph.message import Messages - +from .vector_index import build_vector_index_from_chat_history, update_vector_index, retrieval_index_path from .extensions import db -from .langgraph_agent import build_langgraph_agent, execute_langgraph_agent +from .langgraph_agent import execute_langgraph_agent async def get_chat_history(session_id): @@ -39,4 +39,8 @@ async def process_user_message(session_id, user_message, api_key, model_name, us # Limit chat history to last 20 messages history = history[-20:] await update_chat_history(session_id, history) + if not os.path.exists(retrieval_index_path): + await build_vector_index_from_chat_history(api_key) + else: + await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content}) return reply.content, response_message_id diff --git a/services/chatbot/src/chatbot/vector_index.py b/services/chatbot/src/chatbot/vector_index.py new file mode 100644 index 00000000..0eeb4dff --- /dev/null +++ b/services/chatbot/src/chatbot/vector_index.py @@ -0,0 +1,45 @@ +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from langchain_core.documents import Document +from .extensions import db + +retrieval_index_path = "/app/resources/chat_index" + +async def build_vector_index_from_chat_history(api_key): + docs = [] + async for chat in db.chat_sessions.find({}): + session_id = chat.get("session_id", "unknown") + messages = chat.get("messages", []) + for msg in messages: + role = msg.get("role") + content = msg.get("content") + if content: + doc = Document( + page_content=content, + metadata={"session_id": session_id, "role": role} + ) + docs.append(doc) + + embeddings = OpenAIEmbeddings(api_key=api_key) + vectorstore = FAISS.from_documents(docs, embeddings) + vectorstore.save_local(retrieval_index_path) + +async def update_vector_index(api_key, session_id, new_messages): + docs = [] + for role, content in new_messages.items(): + if content: + doc = Document( + page_content=content, + metadata={"session_id": session_id, "role": role} + ) + docs.append(doc) + + if docs: + embeddings = OpenAIEmbeddings(api_key=api_key) + vectorstore = FAISS.load_local( + retrieval_index_path, + embeddings, + allow_dangerous_deserialization=True, + ) + vectorstore.add_documents(docs) + vectorstore.save_local(retrieval_index_path) \ No newline at end of file diff --git a/services/chatbot/src/mcpserver/server.py b/services/chatbot/src/mcpserver/server.py index c476d6db..78760626 100644 --- a/services/chatbot/src/mcpserver/server.py +++ b/services/chatbot/src/mcpserver/server.py @@ -4,6 +4,10 @@ import os import logging import time +from .tool_helpers import ( + get_any_api_key, + get_chat_history_retriever, +) # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" @@ -77,6 +81,27 @@ def get_http_client(): name="My crAPI MCP Server" ) +@mcp.tool(tags={"history", "search", "summary", "context"},) +async def search_chat_history(question: str) -> str: + """Answer questions based on user chat history (summarized and semantically indexed). + Use this when the user asks about prior chats, what they asked earlier, or wants a summary of past conversations. + Answer questions based on the user's prior chat history. + + Use this tool when the user refers to anything mentioned before, asks for a summary of previous messages or sessions, + or references phrases like 'what I said earlier', 'things we discussed', 'my earlier question', 'until now', 'till date', 'all my conversations' or 'previously mentioned'. + The chat history is semantically indexed and summarized using vector search.""" + + logger.info(f"search_chat_history called with: {question}") + api_key=await get_any_api_key() + if not api_key: + logger.error("API key is not available. Cannot search chat history.") + return "OpenAI API key is not available. Cannot search chat history." + retriever = await get_chat_history_retriever(api_key=api_key) + response = await retriever.ainvoke({"query": question}) + result = response["result"] + logger.info(f"RESULT: {result}") + return result + if __name__ == "__main__": mcp_server_port = int(os.environ.get("MCP_SERVER_PORT", 5500)) mcp.run(transport="streamable-http", host="0.0.0.0", port=mcp_server_port,) diff --git a/services/chatbot/src/mcpserver/tool_helpers.py b/services/chatbot/src/mcpserver/tool_helpers.py new file mode 100644 index 00000000..3556892a --- /dev/null +++ b/services/chatbot/src/mcpserver/tool_helpers.py @@ -0,0 +1,48 @@ +import os +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from langchain.prompts import PromptTemplate +from chatbot.extensions import db +from langchain.chains import RetrievalQA +from langchain_openai import ChatOpenAI + +retrieval_index_path = "/app/resources/chat_index" + +async def get_any_api_key(): + if os.environ.get("CHATBOT_OPENAI_API_KEY"): + return os.environ.get("CHATBOT_OPENAI_API_KEY") + doc = await db.sessions.find_one( + {"openai_api_key": {"$exists": True, "$ne": None}}, + {"openai_api_key": 1} + ) + if doc and "openai_api_key" in doc: + return doc["openai_api_key"] + return None + +async def get_chat_history_retriever(api_key: str): + prompt_template = PromptTemplate.from_template( + """You are an assistant that summarizes chat history across sessions. + + Given the following chat excerpts: + {context} + Answer the user's question: {question} + + If the user asks for a summary, provide a coherent, high-level summary of the conversations in natural language. + If the user asks a specific question, extract and answer it from the chats. + Be detailed, accurate, and neutral.""" + ) + embeddings = OpenAIEmbeddings(api_key=api_key) + vectorstore = FAISS.load_local( + retrieval_index_path, + embeddings, + allow_dangerous_deserialization=True + ) + retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}) + qa_chain = RetrievalQA.from_chain_type( + llm=ChatOpenAI(api_key=api_key, model="gpt-4o"), + retriever=retriever, + chain_type="stuff", + chain_type_kwargs={"prompt": prompt_template, "document_variable_name": "context"}, + return_source_documents=False, + ) + return qa_chain From a9257ec84cc354a008a03bd15cbcb1ebcc0ad31f Mon Sep 17 00:00:00 2001 From: Keyur Doshi Date: Fri, 25 Jul 2025 06:14:43 +0530 Subject: [PATCH 2/2] Persisted storage of vectors using Chroma --- deploy/docker/docker-compose.yml | 4 +++ deploy/helm/templates/chatbot/config.yaml | 1 + deploy/helm/templates/chatbot/deployment.yaml | 7 ++++ deploy/helm/templates/chatbot/storage.yaml | 33 +++++++++++++++++ deploy/helm/values-pv.yaml | 19 ++++++++++ deploy/helm/values.yaml | 18 ++++++++++ services/chatbot/Dockerfile | 1 + services/chatbot/requirements.txt | 3 +- services/chatbot/src/chatbot/chat_service.py | 11 +++--- services/chatbot/src/chatbot/config.py | 1 + services/chatbot/src/chatbot/vector_index.py | 36 ++++--------------- .../chatbot/src/mcpserver/tool_helpers.py | 12 +++---- 12 files changed, 104 insertions(+), 42 deletions(-) create mode 100644 deploy/helm/templates/chatbot/storage.yaml diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml index 820119cd..50e2d85a 100755 --- a/deploy/docker/docker-compose.yml +++ b/deploy/docker/docker-compose.yml @@ -167,7 +167,10 @@ services: - MONGO_DB_PASSWORD=crapisecretpassword - MONGO_DB_NAME=crapi - DEFAULT_MODEL=gpt-4o-mini + - CHROMA_PERSIST_DIRECTORY=/app/vectorstore # - CHATBOT_OPENAI_API_KEY= + volumes: + - chatbot-vectors:/app/vectorstore depends_on: mongodb: condition: service_healthy @@ -295,3 +298,4 @@ services: volumes: mongodb-data: postgresql-data: + chatbot-vectors: diff --git a/deploy/helm/templates/chatbot/config.yaml b/deploy/helm/templates/chatbot/config.yaml index 9afbb753..215d9af8 100644 --- a/deploy/helm/templates/chatbot/config.yaml +++ b/deploy/helm/templates/chatbot/config.yaml @@ -22,3 +22,4 @@ data: MONGO_DB_NAME: {{ .Values.mongodb.config.mongoDbName }} CHATBOT_OPENAI_API_KEY: {{ .Values.openAIApiKey }} DEFAULT_MODEL: {{ .Values.chatbot.config.defaultModel | quote }} + CHROMA_PERSIST_DIRECTORY: {{ .Values.chatbot.config.chromaPersistDirectory | quote }} diff --git a/deploy/helm/templates/chatbot/deployment.yaml b/deploy/helm/templates/chatbot/deployment.yaml index 692dfa0c..f58c047f 100644 --- a/deploy/helm/templates/chatbot/deployment.yaml +++ b/deploy/helm/templates/chatbot/deployment.yaml @@ -57,3 +57,10 @@ spec: port: {{ .Values.chatbot.port }} initialDelaySeconds: 15 periodSeconds: 10 + volumeMounts: + - name: chatbot-vectors + mountPath: {{ .Values.chatbot.config.chromaPersistDirectory | quote }} + volumes: + - name: chatbot-vectors + persistentVolumeClaim: + claimName: {{ .Values.chatbot.storage.pvc.name }} diff --git a/deploy/helm/templates/chatbot/storage.yaml b/deploy/helm/templates/chatbot/storage.yaml new file mode 100644 index 00000000..bace224f --- /dev/null +++ b/deploy/helm/templates/chatbot/storage.yaml @@ -0,0 +1,33 @@ +{{- if eq .Values.chatbot.storage.type "manual" }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: {{ .Values.chatbot.storage.pv.name }} + labels: + release: {{ .Release.Name }} + {{- toYaml .Values.chatbot.storage.pv.labels | nindent 4 }} +spec: + storageClassName: {{ .Values.chatbot.storage.type }} + capacity: + storage: {{ .Values.chatbot.storage.pv.resources.storage }} + accessModes: + - ReadWriteOnce + hostPath: + path: {{ .Values.chatbot.storage.pv.hostPath }} +--- +{{- end }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Values.chatbot.storage.pvc.name }} + labels: + release: {{ .Release.Name }} + {{- toYaml .Values.chatbot.storage.pvc.labels | nindent 4 }} +spec: + {{- if ne .Values.chatbot.storage.type "default" }} + storageClassName: {{ .Values.chatbot.storage.type }} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + {{- toYaml .Values.chatbot.storage.pvc.resources | nindent 4 }} diff --git a/deploy/helm/values-pv.yaml b/deploy/helm/values-pv.yaml index ac602899..691b1fa9 100644 --- a/deploy/helm/values-pv.yaml +++ b/deploy/helm/values-pv.yaml @@ -35,3 +35,22 @@ postgresdb: resources: requests: storage: 2Gi + +chatbot: + storage: + type: "manual" + pv: + name: chatbot-vectors-pv + labels: + app: crapi-chatbot + resources: + storage: 1Gi + hostPath: /mnt/vectorstore + type: "default" + pvc: + name: chatbot-vectors-pv-claim + labels: + app: crapi-chatbot + resources: + requests: + storage: 1Gi \ No newline at end of file diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml index 9122ff70..030bf65c 100644 --- a/deploy/helm/values.yaml +++ b/deploy/helm/values.yaml @@ -152,6 +152,24 @@ chatbot: mongoDbDriver: mongodb secretKey: crapi defaultModel: gpt-4o-mini + chromaPersistDirectory: /app/vectorstore + storage: + # type: "manual" + # pv: + # name: chatbot-vectors-pv + # labels: + # app: crapi-chatbot + # resources: + # storage: 1Gi + # hostPath: /mnt/vectorstore + type: "default" + pvc: + name: chatbot-vectors-pv-claim + labels: + app: crapi-chatbot + resources: + requests: + storage: 1Gi deploymentLabels: app: crapi-chatbot podLabels: diff --git a/services/chatbot/Dockerfile b/services/chatbot/Dockerfile index fa28895e..5218361e 100644 --- a/services/chatbot/Dockerfile +++ b/services/chatbot/Dockerfile @@ -16,6 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY src /app COPY certs /app/certs COPY retrieval /app/retrieval +RUN mkdir -p /app/vectorstore RUN mkdir -p /app/resources COPY src/resources/crapi-openapi-spec.json /app/resources/crapi-openapi-spec.json ENV PYTHONPATH="/app" diff --git a/services/chatbot/requirements.txt b/services/chatbot/requirements.txt index f3d0248c..d4cbc666 100644 --- a/services/chatbot/requirements.txt +++ b/services/chatbot/requirements.txt @@ -18,4 +18,5 @@ langgraph==0.5.1 faiss-cpu==1.11.0 psycopg2-binary uvicorn==0.35.0 -fastmcp==2.10.2 \ No newline at end of file +fastmcp==2.10.2 +chromadb==1.0.15 \ No newline at end of file diff --git a/services/chatbot/src/chatbot/chat_service.py b/services/chatbot/src/chatbot/chat_service.py index 58a5ec1f..8a0491f6 100644 --- a/services/chatbot/src/chatbot/chat_service.py +++ b/services/chatbot/src/chatbot/chat_service.py @@ -1,7 +1,6 @@ from uuid import uuid4 -import os from langgraph.graph.message import Messages -from .vector_index import build_vector_index_from_chat_history, update_vector_index, retrieval_index_path +from .vector_index import update_vector_index from .extensions import db from .langgraph_agent import execute_langgraph_agent @@ -39,8 +38,8 @@ async def process_user_message(session_id, user_message, api_key, model_name, us # Limit chat history to last 20 messages history = history[-20:] await update_chat_history(session_id, history) - if not os.path.exists(retrieval_index_path): - await build_vector_index_from_chat_history(api_key) - else: - await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content}) + # if not os.path.exists(retrieval_index_path): + # await build_vector_index_from_chat_history(api_key) + # else: + await update_vector_index(api_key, session_id, {"user": user_message, "assistant": reply.content}) return reply.content, response_message_id diff --git a/services/chatbot/src/chatbot/config.py b/services/chatbot/src/chatbot/config.py index 2994d906..eb7d81f7 100644 --- a/services/chatbot/src/chatbot/config.py +++ b/services/chatbot/src/chatbot/config.py @@ -11,3 +11,4 @@ class Config: SECRET_KEY = os.getenv("SECRET_KEY", "super-secret") MONGO_URI = MONGO_CONNECTION_URI DEFAULT_MODEL_NAME = os.getenv("DEFAULT_MODEL", "gpt-4o-mini") + CHROMA_PERSIST_DIRECTORY = os.getenv("CHROMA_PERSIST_DIRECTORY", "/app/vectorstore") diff --git a/services/chatbot/src/chatbot/vector_index.py b/services/chatbot/src/chatbot/vector_index.py index 0eeb4dff..8397ea9a 100644 --- a/services/chatbot/src/chatbot/vector_index.py +++ b/services/chatbot/src/chatbot/vector_index.py @@ -1,28 +1,7 @@ from langchain_community.embeddings import OpenAIEmbeddings -from langchain_community.vectorstores import FAISS +from langchain_community.vectorstores import Chroma from langchain_core.documents import Document -from .extensions import db - -retrieval_index_path = "/app/resources/chat_index" - -async def build_vector_index_from_chat_history(api_key): - docs = [] - async for chat in db.chat_sessions.find({}): - session_id = chat.get("session_id", "unknown") - messages = chat.get("messages", []) - for msg in messages: - role = msg.get("role") - content = msg.get("content") - if content: - doc = Document( - page_content=content, - metadata={"session_id": session_id, "role": role} - ) - docs.append(doc) - - embeddings = OpenAIEmbeddings(api_key=api_key) - vectorstore = FAISS.from_documents(docs, embeddings) - vectorstore.save_local(retrieval_index_path) +from .config import Config async def update_vector_index(api_key, session_id, new_messages): docs = [] @@ -35,11 +14,10 @@ async def update_vector_index(api_key, session_id, new_messages): docs.append(doc) if docs: - embeddings = OpenAIEmbeddings(api_key=api_key) - vectorstore = FAISS.load_local( - retrieval_index_path, - embeddings, - allow_dangerous_deserialization=True, + embeddings = OpenAIEmbeddings(api_key=api_key, model="text-embedding-3-large") + vectorstore = Chroma( + embedding_function=embeddings, + persist_directory=Config.CHROMA_PERSIST_DIRECTORY ) vectorstore.add_documents(docs) - vectorstore.save_local(retrieval_index_path) \ No newline at end of file + vectorstore.persist() \ No newline at end of file diff --git a/services/chatbot/src/mcpserver/tool_helpers.py b/services/chatbot/src/mcpserver/tool_helpers.py index 3556892a..d78066ce 100644 --- a/services/chatbot/src/mcpserver/tool_helpers.py +++ b/services/chatbot/src/mcpserver/tool_helpers.py @@ -1,8 +1,9 @@ import os from langchain_community.embeddings import OpenAIEmbeddings -from langchain_community.vectorstores import FAISS +from langchain_community.vectorstores import Chroma from langchain.prompts import PromptTemplate from chatbot.extensions import db +from chatbot.config import Config from langchain.chains import RetrievalQA from langchain_openai import ChatOpenAI @@ -31,11 +32,10 @@ async def get_chat_history_retriever(api_key: str): If the user asks a specific question, extract and answer it from the chats. Be detailed, accurate, and neutral.""" ) - embeddings = OpenAIEmbeddings(api_key=api_key) - vectorstore = FAISS.load_local( - retrieval_index_path, - embeddings, - allow_dangerous_deserialization=True + embeddings = OpenAIEmbeddings(api_key=api_key, model="text-embedding-3-large") + vectorstore = Chroma( + embedding_function=embeddings, + persist_directory=Config.CHROMA_PERSIST_DIRECTORY ) retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}) qa_chain = RetrievalQA.from_chain_type(