In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from nlp_chat_bot.model.minilm import MiniLM
from nlp_chat_bot.rag import RAG

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [3]:
dataset_path = "../data"
model_download_path = "../models"

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=50,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

minilm = MiniLM(model_download_path=model_download_path)
llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
rag = RAG(dataset_path, minilm, splitter, llm=llm_gemini)
print("LENGTH", rag.get_num_docs())
docs_retrieved = rag.retrieve(state = {"question": "What is the the article 93 of GRPD?", "context": []})

print("Num docs:", len(docs_retrieved["context"]))

for i in range(len(docs_retrieved["context"])):
    doc = docs_retrieved["context"][i]
    print("\n\n", "#"*30,"\n")
    print(f"doc {i}: (score: {doc.metadata['score']})")
    print(doc.page_content)





100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:01<00:00,  1.77s/it]
0it [00:00, ?it/s]


Embedding and storing 447 chunks...


100%|██████████| 447/447 [00:02<00:00, 196.69it/s]


LENGTH 447
Num docs: 3


 ############################## 

doc 0: (score: 1.270198941230774)
been expressed by either the European P arliament or the Council within a per iod of three months of notifi cation of that 
act to the European P arliament and the Council or if, bef ore the expir y of that per iod, the European P arliament and the 
Council have both inf or med the Commission that they will not object. That per iod shall be extended by three months at 
the initiative of the European P arliament or of the Council. 
Ar ticle 93 
Committee procedure 
1. The Commission shall be assisted by a committee. That committee shall be a committee within the meaning of 
Regulation (EU) No 182/2011. 
2. Where refe rence is made to this paragraph, Ar ticle 5 of Regulation (EU) No 182/2011 shall apply . 
3. Where reference is made to this paragraph, Ar ticle 8 of Regulation (EU) No 182/2011, in conjunction with 
Ar ticle 5 thereof, shall apply . 
CHAPTE R XI 
Final pr ovision s 
Ar ticle 94 
Re

In [4]:
rag.invoke(query={"question":"What is the the article 93 of GRPD?"})

{'question': 'What is the the article 93 of GRPD?',
 'context': (Document(metadata={'score': 1.270198941230774}, page_content='been expressed by either the European P arliament or the Council within a per iod of three months of notifi cation of that \nact to the European P arliament and the Council or if, bef ore the expir y of that per iod, the European P arliament and the \nCouncil have both inf or med the Commission that they will not object. That per iod shall be extended by three months at \nthe initiative of the European P arliament or of the Council. \nAr ticle 93 \nCommittee procedure \n1. The Commission shall be assisted by a committee. That committee shall be a committee within the meaning of \nRegulation (EU) No 182/2011. \n2. Where refe rence is made to this paragraph, Ar ticle 5 of Regulation (EU) No 182/2011 shall apply . \n3. Where reference is made to this paragraph, Ar ticle 8 of Regulation (EU) No 182/2011, in conjunction with \nAr ticle 5 thereof, shall apply . \nCHA