In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from nlp_chat_bot.model.minilm import MiniLM
from nlp_chat_bot.rag.classic_rag import ClassicRAG
from nlp_chat_bot.vector_store.naive_chunking_chroma_vector_store_builder import NaiveChunkingChromaVectorStoreBuilder

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
dataset_path = "../data"
vector_store_path = "../chromadb"
model_download_path = "../models"

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=50,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

embedding_function = MiniLM(model_download_path=model_download_path)
llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
document_loader = None
vector_store = NaiveChunkingChromaVectorStoreBuilder(dataset_path,
                                        embedding_function,
                                        vector_store_path,
                                        splitter).build()
rag = ClassicRAG(vector_store, llm_gemini)

print("LENGTH", rag.get_num_docs())
docs_retrieved = rag.retrieve(state = {"question": "What is my conclusion in my project report on image inpainting?", "context": []})

print("Num docs:", len(docs_retrieved["context"]))


for i in range(len(docs_retrieved["context"])):
    doc = docs_retrieved["context"][i]
    print("\n\n", "#"*30,"\n")
    print(f"doc {i}: (score: {doc.metadata['score']})")
    print(doc.page_content)

In [None]:
rag.invoke(query={"question":"What is my conclusion in my project report on image inpainting?"})["answer"]