In [1]:
%pwd

'/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG/notebooks'

In [2]:
%cd ..

/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG


In [3]:
from src.E2EMedicalChatBotWithRAG.logger import logger
from src.E2EMedicalChatBotWithRAG.utils import load_env_variable
from src.E2EMedicalChatBotWithRAG.config.configuration import ConfigurationManager
from src.E2EMedicalChatBotWithRAG.exceptions import AppException
from src.E2EMedicalChatBotWithRAG.models.embedding_model import EmbeddingModel

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_redis import RedisVectorStore 

In [6]:
embedding_model = EmbeddingModel()._get_model()

[2025-09-20 17:11:40,304|(INFO)| File: SentenceTransformer | Message: Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2]
[2025-09-20 17:11:46,323|(INFO)| File: embedding_model | Message: Successfully loaded embedding model: sentence-transformers/all-MiniLM-L6-v2]


In [7]:
vectore_store = RedisVectorStore(
    index_name="medical-chatbot",
    embeddings=embedding_model,
    redis_url="redis://localhost:6380"
)

[2025-09-20 17:14:23,354|(INFO)| File: index | Message: Index already exists, not overwriting.]


In [8]:
from src.E2EMedicalChatBotWithRAG.preprocess.document_preprocesser import DocumentPreprocesser
document_preprocesser = DocumentPreprocesser()

In [9]:
path = "./data"

In [10]:
clean_document = document_preprocesser.run(path)

[2025-09-20 17:15:32,933|(INFO)| File: document_preprocesser | Message: Loaded 637 documents from ./data]
[2025-09-20 17:15:32,965|(INFO)| File: document_preprocesser | Message: Filtered out 637 documents that don't meet the criteria]
[2025-09-20 17:15:33,263|(INFO)| File: document_preprocesser | Message: Chunked 3426 documents into smaller chunks.]


In [11]:
doc_vector_store = vectore_store.from_documents(
    documents=clean_document,
    embedding=embedding_model,
    index_name="medical-chatbot",
    redis_url="redis://localhost:6380"
)

[2025-09-20 17:15:33,375|(INFO)| File: index | Message: Index already exists, not overwriting.]


In [13]:
doc_vector_store = vectore_store.from_existing_index(
    index_name="medical-chatbot",
    embedding=embedding_model,
    redis_url="redis://localhost:6380"
)

[2025-09-20 14:43:16,822|(INFO)| File: index | Message: Index already exists, not overwriting.]


In [15]:
retriever = doc_vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})


In [16]:
retriever

VectorStoreRetriever(tags=['RedisVectorStore', 'HuggingFaceEmbeddings'], vectorstore=<langchain_redis.vectorstores.RedisVectorStore object at 0x762fe7092a80>, search_kwargs={'k': 3})

In [12]:
from langchain.schema import Document
new_doc = Document(
    page_content="This project is built by Kirti Pogra, she used langchain and groq and RAG functionalitize. And this project is for her portfolio. \
     The code is available on GitHub. The project name is medical chatbot using rag ",
    metadata={"source": "kirti pogra"}
)


In [13]:
doc_vector_store.add_documents(
    [new_doc]
)

['medical-chatbot::01K5KM55NHXVFQRA8AP3KPSN54']

In [17]:
retriever.invoke("who built this project?")

[Document(metadata={'source': 'kirti pogra'}, page_content='This project is built by Kirti Pogra, she used langchain and groq and RAG functionalitize. And this project is for her portfolio.      The code is available on GitHub. The project name is medical chatbot using rag '),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='His listening program, the invention of the Electronic\nEar, and his work with the therapeutic use of sound and\nmusic for the past fifty years have made Tomatis arguably\nthe best known and most successful ear specialist in the\nworld. There are more than two hundred Tomatis Centers\nworldwide, treating a vast variety of problems related to\nthe ability to hear.\n(Photograph by V. Brynner. Gamma Liaison. Reproduced by\npermission.)\nGEM - 0001 to 0432 - A  10/22/03 1:43 PM  Page 416'),
 Document(metadata={'source': 'data/Medical_book.pdf'}, page_content='8 (July-August 1997): 26.\nORGANIZATIONS\nProject Inform. 205 13th Street, #2001, San Fran

In [19]:
retriever.invoke("who is kirti pogra")

[Document(metadata={'source': 'kirti pogra'}, page_content='This project is built by Kirti Pogra using LangChain, Pinecone and Groq. The code is available on GitHub. This project is only for educational purposes.')]