In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from nlp_chat_bot.model.minilm import MiniLM
from nlp_chat_bot.rag import RAG
from nlp_chat_bot.vector_store.chroma_vector_store_builder import ChromaVectorStoreBuilder

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
dataset_path = "../data"
vector_store_path = "../chromadb"
model_download_path = "../models"

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=50,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

embedding_function = MiniLM(model_download_path=model_download_path)
llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
rag = RAG(dataset_path, embedding_function, vector_store_path, splitter, llm=llm_gemini)
print("LENGTH", rag.get_num_docs())
docs_retrieved = rag.retrieve(state = {"question": "What is the the article 93 of GRPD?", "context": []})

print("Num docs:", len(docs_retrieved["context"]))

for i in range(len(docs_retrieved["context"])):
    doc = docs_retrieved["context"][i]
    print("\n\n", "#"*30,"\n")
    print(f"doc {i}: (score: {doc.metadata['score']})")
    print(doc.page_content)

0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:02<00:00,  2.03s/it]
0it [00:00, ?it/s]


Embedding and storing 14 chunks...


100%|██████████| 14/14 [00:00<00:00, 63.28it/s]


LENGTH 14
Num docs: 3


 ############################## 

doc 0: (score: 1.553395305285434)
Privacy policy About Wikipedia Disclaimers Contact Wikipedia Code of Conduct Developers Statistics Cookie statement Mobile view

This page was last edited on 12 December 2024, at 00:08 (UTC).

Text is available under the Creative Commons Attribution-ShareAlike 4.0 License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.

Contents

hide

(Top)

Background

Capabilities

Corporate customization

GPT-4o mini

Scarlett Johansson controversy

See also

References

GPT-4o

17 languages

Article Talk Read Edit View history

Tools

Appearance

hide

Text

Width

Color (beta)

Small

Standard

Large

Standard

Wide

Automatic

Light

Dark

Generative Pre-trained Transformer

4 Omni (GPT-4o)

Developer(s) OpenAI

Initial release May 13, 2024; 7 months ago

Pr

In [4]:
rag.invoke(query={"question":"What is the the article 93 of GRPD?"})

{'question': 'What is the the article 93 of GRPD?',
 'context': (Document(metadata={'score': 1.553395305285434}, page_content='Privacy policy About Wikipedia Disclaimers Contact Wikipedia Code of Conduct Developers Statistics Cookie statement Mobile view\n\nThis page was last edited on 12 December 2024, at 00:08 (UTC).\n\nText is available under the Creative Commons Attribution-ShareAlike 4.0 License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.\n\nContents\n\nhide\n\n(Top)\n\nBackground\n\nCapabilities\n\nCorporate customization\n\nGPT-4o mini\n\nScarlett Johansson controversy\n\nSee also\n\nReferences\n\nGPT-4o\n\n17 languages\n\nArticle Talk Read Edit View history\n\nTools\n\nAppearance\n\nhide\n\nText\n\nWidth\n\nColor (beta)\n\nSmall\n\nStandard\n\nLarge\n\nStandard\n\nWide\n\nAutomatic\n\nLight\n\nDark\n\nGenerative Pre-trained Tr