In [3]:
# Importimg dependancy
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import UnstructuredWordDocumentLoader 
from langchain_groq import ChatGroq

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Loading .env
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
# Api Keys and embbeding model
Data_path = "D:\Desktop\RAG_Project\data"
Embedding_model = os.getenv("EMBEDDING_MODEL")
llm_api_key = os.getenv("GROQ_API_KEY")
llm_model = os.getenv("LLM_MODEL")

In [6]:
print(llm_model)

llama-3.1-8b-instant


In [None]:
# Setting our embbeding model
embbedinga_model = HuggingFaceEmbeddings(
    model_name = Embedding_model
)

In [8]:
# Setting the llm
llm = ChatGroq(
    model_name = llm_model,
    groq_api_key = llm_api_key,
    temperature = 0.0 # for testing our knowledge base am setting it 0 for no hallucinations
)

In [9]:
# Loading our document 

loader_map = {
    ".pdf": (PyPDFLoader, {}),
    ".txt": (TextLoader, {"encoding": "utf-8"}),
    ".docx": (UnstructuredWordDocumentLoader, {}),
}

try:
    documents = []

    for ext, (loader_class,kwargs) in loader_map.items():

        temp_loader = DirectoryLoader(
            path=Data_path,
            glob=f"**/*{ext}",
            loader_cls=loader_class,
            loader_kwargs=kwargs
        )
        documents.extend(temp_loader.load())

except Exception as e:
    print(f"Error during document loading: {e}")
    print("Ensure all required packages (PyPdf, unstructed, docx2txt)are installed")

print(f"\nSuccessfully loaded a total of {len(documents)} initial documents (pages/files).")


Successfully loaded a total of 31 initial documents (pages/files).


In [10]:
# Chunking

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

chunk = text_splitter.split_documents(documents)
print(f"Total chunks created from all file: {len(chunk)}")

Total chunks created from all file: 240


In [11]:
# Vectorization and store creation
embedding_model = Embedding_model
vectore_store = FAISS.from_documents(chunk,embbedinga_model)
print("FAISS Vector Store created successfully from all documents!")

FAISS Vector Store created successfully from all documents!


In [12]:
# Testing
test_query = "Which Diet is good for PCOS"
retriever = vectore_store.as_retriever(search_kwargs={"k": 2})

retrieved_docs = retriever.invoke(test_query)

print(f"Verification : Retrieved from documnets for {test_query}")

for i,doc in enumerate(retrieved_docs):
    source_path = doc.metadata.get('source','Unknown')
    print(f"Document {i+1} Source: {os.path.basename(source_path)}")
    print(f"Content Snippet: {doc.page_content[:400]}")

Verification : Retrieved from documnets for Which Diet is good for PCOS
Document 1 Source: PCOS_4.txt
Content Snippet: Diet for PCOS: What to Avoid
“Research shows that people with PCOS show evidence of all-over inflammation, which is associated with heart disease and other illness. The Mediterranean diet eliminates saturated fats, processed meats and refined sugar, which makes it a powerful tool to address inflammation,” Stathos says.

She notes that other well-balanced plans emphasizing non-starchy vegetables an
Document 2 Source: PCOS_4.txt
Content Snippet: Stathos says that insulin resistance affects 50% to 75% of people with PCOS. She explains, “Insulin is like a key that opens cells and lets glucose in. Glucose is fuel for energy. The body is very good at making insulin, but in people with insulin resistance, the insulin does not convey glucose into the cells properly. The result is glucose building up in the bloodstream and the fat cells, which r


In [13]:
# Chaining Them together
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel,RunnablePassthrough
# Prompt
prompt = ChatPromptTemplate.from_template("""
You are a helpful medical assistant specialized in PCOS.

Use ONLY the information in the context below to answer the user's question.
If the answer is not in the context, say clearly:
"I’m not able to find this information in the provided documents."

<context>
{context}
</context>

Question: {question}

Give a clear, friendly, and concise answer.
""")


In [None]:
# Chaining it together
rag_chain=(
    {"context":retriever,"question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Checking our Chain
test_Q = "What is Pcos and what can they eat for maintaining health"

answer = rag_chain.invoke(test_Q)

print(f" Question: {test_Q}")
print()
print(f"answer: {answer}")

 Question: What is Pcos and what can they eat for maintaining health

answer: PCOS stands for Polycystic Ovary Syndrome, a condition associated with inflammation, heart disease, and other health issues. 

To maintain health, people with PCOS can follow a well-balanced diet that includes:

- Non-starchy vegetables
- Fruits
- Lean protein
- Healthy carbs
- Low-fat dairy

They can also consider the Mediterranean diet, which eliminates saturated fats, processed meats, and refined sugar. Some examples of healthy foods include:

- Whole, unprocessed options
- Fresh fruits and vegetables
- Lean protein sources
- Whole grains
- Low-fat dairy products

Avoiding foods that can cause inflammation, such as fried foods, red meat, processed snacks, and sugary beverages, can also help manage PCOS symptoms.
