## Setup 

In [1]:
from vector_database.src.text_splitter import chunk_documents, save_chunks_to_disk
from vector_database.src.utils import load_config

from dotenv import load_dotenv
from pathlib import Path

# 1. Load environment
load_dotenv()

# 2. Load config
config_path = Path("config.yaml")
config = load_config(config_path)


## Download documents only if needed

In [None]:
from vector_database.src.documentation_loader import clone_repo, cleanup_old_outputs

# 3. If you want to download the docs from GitHub again, run this:
cleanup_old_outputs()
clone_repo(config)



Cloning from https://github.com/langchain-ai/langgraph.git to /Users/manuelalejandroquesada/PERSONAL/REPOSITORIES/langgraph_documentation_RAG/docs/source_docs...


## Load the Documents

In [None]:
from vector_database.src.documentation_loader import load_documents

# 4. Load documents
docs_path = config['data_source']['github']['target_path']
all_docs = load_documents(docs_path)

## Split documents

In [None]:

# 5. Chunk and save
chunks = chunk_documents(all_docs, config)
save_chunks_to_disk(chunks)

## Populate Qdrant Vector Database

In [None]:
from vector_database.src.vector_store import store_embeddings

# 6. Store embeddings to Qdrant
store_embeddings(chunks, config)

## LangChain vector database  to use in RAG

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

from vector_database.src.vector_store import COLLECTION_NAME,client,embeddings


# client.create_collection(
#     collection_name=COLLECTION_NAME,
#     vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
# )

vector_store = QdrantVectorStore(
    client=client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings,
)

## Initializate the RAG Pipeline

In [None]:
from rag_pipeline.core import RAGPipeline
from prompts.core import PromptBuilder
from langgraph.checkpoint.memory import MemorySaver

builder = PromptBuilder(config_path="prompts/config.yaml")

rag_config = {"configurable": {"thread_id": "test"}}

topic_guard_prompt, _ = builder.build_prompt(
    file_path="../prompts/topic_guard.yml")
rag_system_prompt, _ = builder.build_prompt(
    file_path="../prompts/rag_system_prompt.yml")

checkpointer = MemorySaver()  # only for testing, for production use `PostgresSaver`


rag = RAGPipeline(
    checkpoint=checkpointer,
    vectorstore=vector_store,
    topic_guard_prompt=topic_guard_prompt,
    rag_system_prompt=rag_system_prompt,
    llm_temperature=0.1,
    llm_model_name="gpt-4o-mini",
    num_history_messages=5,
    num_retrieval_chunks=3,
)

## Chat Simulation

In [17]:
import ipywidgets as widgets
from IPython.display import display, Markdown


# Input widget
input_box = widgets.Text(
    value='',
    placeholder='Type your question and press Enter...',
    description='User:',
    layout=widgets.Layout(width='100%')
)

# Output widget
output_box = widgets.Output()

# Handler for Enter key


def on_submit_enter(text):
    question = input_box.value
    answer, sources = rag.chat(question, config=rag_config)
    with output_box:
        display(Markdown(f"**🧑 User:** {question}"))
        if sources:
            sources = " -- ".join(sources)
            display(Markdown(f"**📚 Sources:** {sources}"))
        # This will render the bot's response as Markdown
        display(Markdown(f"**🤖 Bot:**\n {answer}"))
        display(Markdown("---"))
    input_box.value = ''


# Trigger on Enter
input_box.on_submit(on_submit_enter)

# Show interface
display(input_box)
display(output_box)

  input_box.on_submit(on_submit_enter)


Text(value='', description='User:', layout=Layout(width='100%'), placeholder='Type your question and press Ent…

Output()