In [20]:
from dotenv import load_dotenv

_ = load_dotenv()

from api.core.infisical import InfisicalManagedCredentials

secrets_client = InfisicalManagedCredentials()

from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    PromptTemplate,
)
from typing import Union

from langchain_cohere.chat_models import ChatCohere
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_groq.chat_models import ChatGroq
from api.services.embeddings_factory import EmbeddingsFactory
from api.services.vector_store_factory import VectorStoreFactory
from api.services.llm_factory import LLMFactory
from api.services.memory_factory import MemoryFactory
import api.config.constant as constant
from langchain_core.messages import HumanMessage, AIMessage
from langchain.memory.chat_message_histories.upstash_redis import (
    UpstashRedisChatMessageHistory,
)
from langchain_astradb.chat_message_histories import AstraDBChatMessageHistory

2025-10-21 08:51:41 - api.core.infisical - INFO - Fetched secrets metadata from Infisical (values not logged)
2025-10-21 08:51:41 - api.core.infisical - INFO - Infisical Managed Credentials initialized
2025-10-21 08:51:41 - api.core.infisical - INFO - Infisical Managed Credentials initialized


In [2]:
embeddings = EmbeddingsFactory().get_embeddings(
    "sentence-transformers", "intfloat/multilingual-e5-large-instruct"
)

vector_store = VectorStoreFactory().get_vectorstore(
    vectorstore_service="astradb",
    embeddings=embeddings,
)

2025-10-21 08:40:50 - api.services.embeddings_factory - INFO - Using Sentence Transformers embeddings model.
  return SentenceTransformerEmbeddings(
2025-10-21 08:40:53 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: intfloat/multilingual-e5-large-instruct
2025-10-21 08:41:01 - api.services.vector_store_factory - INFO - Using AstraDB
2025-10-21 08:41:01 - langchain_astradb.vectorstores - INFO - vector store default init, collection 'godot_docs'
2025-10-21 08:41:02 - root - INFO - Attempting to fetch keyspace from environment variable 'ASTRA_DB_KEYSPACE'
2025-10-21 08:41:02 - root - INFO - Using keyspace 'default_keyspace' from environment variable.
2025-10-21 08:41:02 - root - INFO - Detecting API environment 'prod' from supplied endpoint
2025-10-21 08:41:02 - astrapy.data.database - INFO - createCollection('godot_docs')
2025-10-21 08:41:05 - astrapy.data.database - INFO - finished createCollection('godot_docs')


In [21]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict


class State(TypedDict):
    question: str
    context: List[Document]
    chat_history: str
    answer: str
    session_id: str
    category: str
    sub_category: str
    source: str
    memory_service: str
    model_name: str
    temperature: float
    top_k: int
    model: Union[ChatCohere, ChatGoogleGenerativeAI, ChatMistralAI, ChatGroq]
    memory_instance: Union[UpstashRedisChatMessageHistory, AstraDBChatMessageHistory]

In [4]:
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=["chat_history", "context", "question"],
                input_types={},
                partial_variables={},
                template=constant.SYSTEM_PROMPT,
            ),
            additional_kwargs={},
        )
    ],
)

In [22]:
def retrieve(state: State):
    filters = {
        "category": state.get("category"),
        "sub_category": state.get("sub_category"),
    }
    clean_filter = {k: v for k, v in filters.items() if v}
    retrieved_docs = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={
            "k": state.get("top_k"),
            "filter": clean_filter,
        },
    ).get_relevant_documents(
        query=state["question"],
    )
    print("RETRIEVED DOCS:", retrieved_docs)
    
    def _parse_and_flatten_memory(messages: list):
        memory_string = ""
        for message in messages:
            if type(message) == HumanMessage:
                memory_string += f"Human: {message.content}\n"
            elif type(message) == AIMessage:
                memory_string += f"Assistant: {message.content}\n\n"
        return memory_string

    memory_instance = MemoryFactory().get_memory_instance(
        memory_service=state.get("memory_service"),
        session_id=state.get("session_id"),
    )

    chat_history = _parse_and_flatten_memory(
        memory_instance.messages
    )
    model = LLMFactory.get_chat_model(
        model_name=state.get("model_name"),
        temperature=state.get("temperature", 0.0),
    )
    return {"context": retrieved_docs, "chat_history": chat_history, "model": model, "memory_instance": memory_instance}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    model = state.get("model")
    messages = prompt.invoke(
        {
            "question": state["question"],
            "context": docs_content,
            "chat_history": state["chat_history"],
        }
    )
    response = model.invoke(messages)
    return {"answer": response.content}

def add_message_history(state: State):
    memory_instance = state.get("memory_instance")
    memory_instance.add_user_message(state.get("question"))
    memory_instance.add_ai_message(state.get("answer"))
    return state

In [28]:
from langgraph.graph import START, StateGraph, END

graph_builder = StateGraph(State).add_sequence([retrieve, generate, add_message_history])
graph_builder.add_edge(START, "retrieve")
graph_builder.add_edge("retrieve", "generate")
graph_builder.add_edge("generate", "add_message_history")
graph_builder.add_edge("add_message_history", END)
graph = graph_builder.compile()

In [30]:
result = graph.invoke(
    {
        "question": "How to add 2D sprites in godot 4?",
        "session_id": "8237648732647238",
        "category": "tutorials",
        "sub_category": None,
        "memory_service": "astradb",
        "model_name": "command-r-plus-08-2024",
        "temperature": 0.5,
        "top_k": 5,
    }
)

print(f"Context: {result['context']}")
print(f"Answer: {result['answer']}")

2025-10-21 09:00:08 - astrapy.data.cursors.cursor - INFO - cursor fetching a page: (empty page state) from godot_docs
2025-10-21 09:00:08 - astrapy.data.cursors.cursor - INFO - cursor finished fetching a page: (empty page state) from godot_docs
2025-10-21 09:00:08 - api.services.memory_factory - INFO - Using AstraDB memory service.
2025-10-21 09:00:08 - root - INFO - Attempting to fetch keyspace from environment variable 'ASTRA_DB_KEYSPACE'
2025-10-21 09:00:08 - root - INFO - Using keyspace 'default_keyspace' from environment variable.
2025-10-21 09:00:08 - root - INFO - Detecting API environment 'prod' from supplied endpoint


RETRIEVED DOCS: [Document(id='c868b1d2ae264bb58dff306eb4d03b09', metadata={'source': 'dataset/rtdocs/docs.godotengine.org/en/latest/tutorials/2d/2d_meshes.html', 'category': 'tutorials', 'sub_category': '2d'}, page_content='You can experiment creating them yourself using SurfaceTool from code and displaying them in a MeshInstance2D node.\nCurrently, the only way to generate a 2D mesh within the editor is by either importing an OBJ file as a mesh, or converting it from a Sprite2D.\nOptimizing pixels drawn\uf0c1\nThis workflow is useful for optimizing 2D drawing in some situations. When drawing large images with transparency, Godot will draw the whole quad to the screen. The large transparent areas will still be drawn.\nThis can affect performance, especially on mobile devices, when drawing very large images (generally screen sized),\nor layering multiple images on top of each other with large transparent areas (for example, when using ParallaxBackground).\nConverting to a mesh will ensu

2025-10-21 09:00:09 - astrapy.data.database - INFO - createCollection('chat_history')
2025-10-21 09:00:11 - astrapy.data.database - INFO - finished createCollection('chat_history')
2025-10-21 09:00:11 - astrapy.data.cursors.cursor - INFO - cursor fetching a page: (empty page state) from chat_history
2025-10-21 09:00:12 - astrapy.data.cursors.cursor - INFO - cursor finished fetching a page: (empty page state) from chat_history
2025-10-21 09:00:12 - api.services.llm_factory - INFO - Using Cohere chat model.
2025-10-21 09:00:12 - astrapy.data.cursors.cursor - INFO - cursor fetching a page: (empty page state) from chat_history
2025-10-21 09:00:12 - astrapy.data.cursors.cursor - INFO - cursor finished fetching a page: (empty page state) from chat_history
2025-10-21 09:00:12 - astrapy.data.cursors.cursor - INFO - cursor fetching a page: (empty page state) from chat_history
2025-10-21 09:00:12 - astrapy.data.cursors.cursor - INFO - cursor finished fetching a page: (empty page state) from chat

Context: [Document(id='c868b1d2ae264bb58dff306eb4d03b09', metadata={'source': 'dataset/rtdocs/docs.godotengine.org/en/latest/tutorials/2d/2d_meshes.html', 'category': 'tutorials', 'sub_category': '2d'}, page_content='You can experiment creating them yourself using SurfaceTool from code and displaying them in a MeshInstance2D node.\nCurrently, the only way to generate a 2D mesh within the editor is by either importing an OBJ file as a mesh, or converting it from a Sprite2D.\nOptimizing pixels drawn\uf0c1\nThis workflow is useful for optimizing 2D drawing in some situations. When drawing large images with transparency, Godot will draw the whole quad to the screen. The large transparent areas will still be drawn.\nThis can affect performance, especially on mobile devices, when drawing very large images (generally screen sized),\nor layering multiple images on top of each other with large transparent areas (for example, when using ParallaxBackground).\nConverting to a mesh will ensure that

ERROR:tornado.general:Uncaught exception in ZMQStream callback
Traceback (most recent call last):
  File "/home/user/dev-compass-59/backend/.venv/lib/python3.12/site-packages/zmq/eventloop/zmqstream.py", line 565, in _log_error
    f.result()
  File "/home/user/dev-compass-59/backend/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 663, in shell_channel_thread_main
    _, msg2 = self.session.feed_identities(msg, copy=False)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/dev-compass-59/backend/.venv/lib/python3.12/site-packages/jupyter_client/session.py", line 994, in feed_identities
    raise ValueError(msg)
ValueError: DELIM not in msg_list
ERROR:tornado.general:Uncaught exception in ZMQStream callback
Traceback (most recent call last):
  File "/home/user/dev-compass-59/backend/.venv/lib/python3.12/site-packages/zmq/eventloop/zmqstream.py", line 565, in _log_error
    f.result()
  File "/home/user/dev-compass-59/backend/.venv/lib/python

In [None]:
graph.get_state(config=None)

In [None]:
print(_parse_and_flatten_memory(session_memory))