In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
import os

In [None]:
## Reading from a pdf file
from langchain_community.document_loaders import TextLoader

loader=TextLoader('Ashtangahridaya.txt')
docs=loader.load()
# 3. Split into smaller chunks (important for embeddings & retrieval)
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,chunk_overlap=50)
Final_document = text_splitter.split_documents(docs)

In [None]:
###Reading a pdf file

from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader("/content/Charaka-Samhita-Acharya-Charaka.pdf")
docs=loader.load()

# 3. Split into smaller chunks (important for embeddings & retrieval)
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,chunk_overlap=50)
Final_document = text_splitter.split_documents(docs)

# apply preprocessing for both the files and create a clean document

In [None]:
import re

def preprocess_text_mupdf(text):
    # Remove headers/footers
    text = re.sub(r'\n\s*\n', '\n', text)  # Remove empty lines
    text = re.sub(r'[^A-Za-z0-9.,;:!?()\'\"\n]+', ' ', text)  # Remove special characters but keep punctuation
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with single space
    return text.strip()

for i in range(len(Final_document)):
  Final_document[i].page_content=preprocess_text_mupdf(Final_document[i].page_content)

# Save the cleaned text to a file
with open('Ashtang_cleaned.txt', 'w') as file:
    for document in Final_document:
        cleaned_text_mupdf = document.page_content
        # Write the cleaned text to the file
        file.write(cleaned_text_mupdf)


In [None]:


# 1. Load multiple books
loader1 = TextLoader("D:\\AIBootcamp\\AyuMitra\\bookbank\\Ashtang_cleaned.txt", encoding="utf-8")
loader2 = TextLoader("D:\\AIBootcamp\\AyuMitra\\bookbank\\Charak_cleaned.txt", encoding="utf-8")

docs1 = loader1.load()
docs2 = loader2.load()

# 2. Combine documents
all_docs = docs1 + docs2

# 3. Split into smaller chunks (important for embeddings & retrieval)
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,chunk_overlap=50)
Final_document = text_splitter.split_documents(all_docs)


In [3]:
len(Final_document)  # Check the number of chunks created

4524

In [2]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
"""
not needed
from langchain.vectorstores import Chroma

db = Chroma.from_documents(Final_document, embedding=embeddings, collection_name="bookstore")
"""


In [None]:
# Create a persistent Chroma DB
persist_directory = r"D:\AIBootcamp\AyuMitra\db"  # use raw string for Windows paths

db = Chroma.from_documents(
    documents=Final_document,
    embedding=embeddings,
    collection_name="bookstore",
    persist_directory=persist_directory
)
db.persist()
print(" Chroma DB saved at", persist_directory)


✅ Chroma DB saved at D:\AIBootcamp\AyuMitra\db


  db.persist()


In [9]:
from langchain_core.runnables import RunnableMap
#To get a callable obebject to map result
retriever=db.as_retriever()
# This gets relevant context documents and prepares inputs for the prompt
retrieval_chain = (
    RunnableMap({
        "question": lambda x: x["question"],
        "context": lambda x: "\n\n".join(
            doc.page_content for doc in retriever.get_relevant_documents(x["question"])
        )
    })
)



In [None]:
import os
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate

# Load environment variables from .env file
load_dotenv()

# Fetch system prompt path from env
system_prompt_path = os.getenv("SYSTEM_PROMPT_PATH")
print(system_prompt_path)

if not system_prompt_path:
    raise ValueError("SYSTEM_PROMPT_PATH not set in .env file")

# Load system prompt from text file
with open(system_prompt_path, "r", encoding="utf-8") as f:
    system_prompt = f.read()

# Build Ayurvedic Medical Bot Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """You are Ayurbot, an expert Ayurvedic assistant.
You act as a doctor and use only the provided context (from Charaka Samhita) 
to generate remedies, medications, and lifestyle advice for the patient.

Response Guidelines:
- Always answer in the following format:
  **User (Patient):** <Repeat the question or symptoms>
  **Chatbot (Ayurvedic Bot):** <Provide remedies/medications>

- Use Ayurvedic principles (Doshas, Agni, Ojas, Dinacharya, Ritucharya) where relevant.
- Present remedies step-by-step (ingredients, preparation, dosage, timing).
- If possible, give more than one option (home remedy, classical formulation, lifestyle advice).
- Mention precautions (pregnancy, chronic illness, interactions with other medicines).
- If the condition is severe or life-threatening, advise seeking immediate medical care.
- Do not generate answers outside the given context.
- End EVERY answer with this disclaimer:
"⚠️ Please confirm this remedy or medication with a certified Ayurvedic practitioner or healthcare expert before following it."
"""),
        (
            "user",
            "Context:\n{context}\n\nQuestion: {question}"
        ),
    ]
)


D:/AIBootcamp/AyuMitra/prompt/system_prompt.txt


In [22]:
# Build Ayurvedic Medical Bot Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """You are Ayurbot, an expert Ayurvedic assistant.
You act as a doctor and use only the provided context
to generate remedies, medications, and lifestyle advice for the patient.

Response Guidelines:
- Always answer in the following format:
  **User (Patient):** <Repeat the question or symptoms>
  **Chatbot (Ayurvedic Bot):** <Provide remedies/medications>

- Use Ayurvedic principles (Doshas, Agni, Ojas, Dinacharya, Ritucharya) where relevant.
- Present remedies step-by-step (ingredients, preparation, dosage, timing).
- If possible, give more than one option (home remedy, classical formulation, lifestyle advice).
- Mention precautions (pregnancy, chronic illness, interactions with other medicines).
- If the condition is severe or life-threatening, advise seeking immediate medical care.
- Do not generate answers outside the given context.
- End EVERY answer with this disclaimer:
"⚠️ Please confirm this remedy or medication with a certified Ayurvedic practitioner or healthcare expert before following it."
"""),
        (
            "user",
            "Context:\n{context}\n\nQuestion: {question}"
        ),
    ]
)


In [16]:
from langchain_google_genai import ChatGoogleGenerativeAI
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=GEMINI_API_KEY,
    temperature=0.5,
    max_output_tokens=4096
)




In [23]:
from langchain_core.output_parsers import StrOutputParser

# For parsing output
output_parser=StrOutputParser()

# Final chain
chain = retrieval_chain | prompt | llm | output_parser



In [24]:
query = " I feel severe pain in urinary bladder,intense pain in penis ,frequent urination in less quantity"


rag_answer=chain.invoke({"question":query})
print(rag_answer)

**User (Patient):** I feel severe pain in urinary bladder,intense pain in penis, frequent urination in less quantity.
**Chatbot (Ayurvedic Bot):**

Based on your symptoms, here are some potential remedies:

*   **Home Remedy:**

    *   **Warm Sitz Bath (Avagaha):** Take a warm tub bath to relieve pain in the bladder and groin region.
*   **Classical Formulations & Ayurvedic Therapies:**

    *   **Vasti Karma (Medicated Enema):** This can help with urinary and pain-related issues. Different types of Vasti can be used based on your specific condition.
    *   **Abhyanga (Body Massage with Oils):** Full body massage can help alleviate body ache.
*   **Lifestyle Advice:**

    *   Avoid suppressing the urge to urinate.
    *   Stay hydrated, but avoid excessive water intake during the urge to urinate.
    *   Avoid coitus when you have the urge to urinate.

⚠️ Please confirm this remedy or medication with a certified Ayurvedic practitioner or healthcare expert before following it.


In [30]:
import numpy as np
from scipy.spatial.distance import cosine
def get_embedding(text):
    """
    Generate an embedding for a given text.

    Args:
    - text (str): The input text.

    Returns:
    - The sentence embedding.
    """
    # Generate the sentence embeddings
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings.embed_query(text)


def calculate_cosine_similarity(embedding1, embedding2):
    """
    Calculate the cosine similarity between two embeddings.

    Args:
    - embedding1 (torch.Tensor): The first embedding.
    - embedding2 (torch.Tensor): The second embedding.

    Returns:
    - The cosine similarity score.
    """
    # Calculate the cosine similarity
    similarity = 1 - cosine(embedding1, embedding2)
    return similarity


def calculate_similarity_scores(true_answer, rag_answer, non_rag_answer):
    """
    Calculate the cosine similarity scores between the true answer and both RAG-based and non-RAG-based answers.

    Args:
    - true_answer (str): The true answer text.
    - rag_answer (str): The RAG-based model's answer text.
    - non_rag_answer (str): The non-RAG-based model's answer text.

    Returns:
    - A dictionary with cosine similarity scores.
    """
    # Convert the answers to embeddings
    true_answer_embedding = get_embedding(true_answer)
    rag_answer_embedding = get_embedding(rag_answer)
    non_rag_answer_embedding = get_embedding(non_rag_answer)

    # Calculate cosine similarity scores
    rag_similarity = calculate_cosine_similarity(true_answer_embedding, rag_answer_embedding)
    non_rag_similarity = calculate_cosine_similarity(true_answer_embedding, non_rag_answer_embedding)


    # Return the scores
    return {
        "RAG Similarity Score": rag_similarity,
        "Non-RAG Similarity Score": non_rag_similarity
    }




In [31]:
true_answer = """**Ayurvedic Remedies**

*   **Classical Formulation:**
    *   **Punarnavadi Kashayam:** This classical Ayurvedic decoction is known for its diuretic and anti-inflammatory properties, which can help alleviate bladder pain and promote healthy urine flow.
        *   **Ingredients:** Punarnava (Boerhavia diffusa) and other herbs.
        *   **Preparation:** Usually available as a ready-made decoction.
        *   **Dosage:** 15-20 ml, mixed with an equal amount of warm water, twice daily after food.
        *   **Timing:** Morning and evening after meals.

**Lifestyle Adjustments**

*   **Hydration:** Drink plenty of fluids (warm water, herbal teas) to help flush out the urinary system and reduce burning sensations.
*   **Avoid Suppressing Urges:** Always heed the natural urge to urinate. Suppressing it can lead to imbalances and pain.
*   **Warm Compress:** Apply a warm compress to the lower abdomen to help soothe the bladder and alleviate pain.

**Precautions:**

*   If you have a known history of kidney stones or any chronic urinary condition, consult with a healthcare provider before trying these remedies.
*   If you experience fever, severe pain, or blood in the urine, seek immediate medical attention.
"""

In [32]:
# Calculate the similarity scores
similarity_scores = calculate_similarity_scores(true_answer, rag_answer, true_answer)
print(similarity_scores)

{'RAG Similarity Score': np.float64(0.7142274190583674), 'Non-RAG Similarity Score': np.float64(1.0)}


In [28]:
from rag_logic import load_db, build_chain,build_conversational_chain,get_session_history
from dotenv import load_dotenv
import os

load_dotenv()
db=load_db(embeddings)  # or "Cases" based on your choice
chain, retriever, config=build_conversational_chain(db)


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db=load_db(embeddings)  # or "Cases" based on your choice

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [26]:
chain,retriever=build_chain(db)

In [29]:
query = " I feel severe pain in urinary bladder,intense pain in penis ,frequent urination in less quantity"


rag_answer=chain.invoke({"question":query})
print(rag_answer)

ValueError: Missing keys ['session_id'] in config['configurable'] Expected keys are ['session_id'].When using via .invoke() or .stream(), pass in a config; e.g., chain.invoke({'question': 'foo'}, {'configurable': {'session_id': '[your-value-here]'}})

In [1]:
from rag_logic import load_db, build_conversational_chain
from langchain_community.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()

# Load embeddings + DB
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = load_db(embeddings)

# Get conversational chain
chain, retriever, config = build_conversational_chain(db)

# Query
query = "I feel severe pain in urinary bladder, intense pain in penis, frequent urination in less quantity"

# ✅ Pass dict, not list
rag_answer = chain.invoke({"question": query}, config=config)
print(rag_answer)



  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


Based on the symptoms and context provided, here are some Ayurvedic remedies that might help:

**Understanding the Ayurvedic Approach:**

*   The symptoms suggest an imbalance of Vata dosha, leading to pain, obstruction, and urinary issues.
*   The treatment aims to balance Vata, promote proper elimination, and soothe the affected areas.

**Ayurvedic Remedies:**

1.  **Home Remedy for Urinary Discomfort:**

    *   **Ingredients:**
        *   Warm water
        *   1/2 teaspoon of Yavakshar (Potassium Carbonate)
    *   **Preparation:** Mix Yavakshar in warm water.
    *   **Dosage:** Drink this once or twice a day.
    *   **Timing:** Preferably on an empty stomach or between meals.
    *   **Benefits:** Yavakshar helps in relieving urinary obstruction and pain.
    *   **Precautions:** Use under supervision, as high doses may cause gastric irritation.

2.  **Classical Ayurvedic Formulation: Chandraprabha Vati**

    *   **Ingredients:** A classical Ayurvedic medicine with a combinat

In [2]:
query = "give me only classical formulations from above answer with no changes"

# ✅ Pass dict, not list
rag_answer = chain.invoke({"question": query}, config=config)
print(rag_answer)

*   **Classical Ayurvedic Formulation:**
    *   **Medication:** Chandraprabha Vati
    *   **Dosage:** 1-2 tablets twice daily
    *   **Timing:** After food with warm water
    *   **Benefits:** It helps in relieving urinary problems.

Please confirm this remedy or medication with a certified Ayurvedic practitioner or healthcare expert before following it.


In [2]:
from rag_logic import save_feedback
query = "give me only classical formulations from above answer with no changes"

rag_answer="testing"
save_feedback(
    query=query,
    bot_output=rag_answer,
    doctor_feedback="correct"
)

In [8]:
# Query
query = "I have legs stiffness+thighs stiffness+wrist stiffness"

# ✅ Pass dict, not list
rag_answer = chain.invoke({"question": query}, config=config)

In [11]:
import pandas as pd
from rag_logic import load_db, build_conversational_chain
from langchain_huggingface import HuggingFaceEmbeddings

# ================================
# Load Embeddings + DB + Chain
# ================================
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = load_db(embeddings)
conv_chain, retriever, config = build_conversational_chain(db, session_id="excel_run")

# ================================
# File paths
# ================================
input_file = "Filtered.xlsx"
output_file = "Disease_symptoms_with_answers.xlsx"

# Load all sheets
sheets_dict = pd.read_excel(input_file)

output_sheets = {}

In [26]:
results = []
for query in sheets_dict['symptoms']:  # assuming column is "symptom"
    rag_answer = conv_chain.invoke({"question": query}, config=config)
    results.append({"query": query, "RAG_Answer": rag_answer})

# Convert to DataFrame for saving
output_df = pd.DataFrame(results)

In [27]:
output_df.head()

Unnamed: 0,query,RAG_Answer
0,fever+cough+hiccups+asthama+change in taste of...,"Based on the symptoms provided, here is an Ayu..."
1,tachycardia+feeling of dryness in heart++stiff...,"Based on the symptoms provided, here is an Ayu..."
2,heart burn+syncope+to be afraid+fever+heat up+...,"Based on the symptoms provided, here is an Ayu..."
3,heaviness in heart+stiffness in heart+dribblin...,"Based on the symptoms provided, here is an Ayu..."
4,intense heart ache+pricking pain in heart+itch...,"Based on the symptoms provided, here is an Ayu..."


In [28]:
output_df.to_excel(output_file, index=False)


In [30]:
from langchain_google_genai import ChatGoogleGenerativeAI
# Use the same model name as in conv_chain config
from dotenv import load_dotenv
load_dotenv()
import os
os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")
gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")


In [31]:
import pandas as pd
import time
from rag_logic import load_db, build_conversational_chain
from sklearn.metrics.pairwise import cosine_similarity
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings

# ================================
# Load Gemini LLM (baseline)
# ================================
gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

# ================================
# Embeddings for similarity
# ================================
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ================================
# Collect results
# ================================
baseline_results = []

for query in sheets_dict['symptoms']:
    # Direct Gemini answer
    gemini_answer = gemini_llm.invoke(query)
    gemini_text = gemini_answer.content if hasattr(gemini_answer, "content") else str(gemini_answer)

    # RAG answer
    rag_answer = conv_chain.invoke({"question": query}, config=config)
    rag_text = rag_answer if isinstance(rag_answer, str) else str(rag_answer)

    # Compute embeddings
    gemini_vec = embeddings_model.embed_query(gemini_text)
    rag_vec = embeddings_model.embed_query(rag_text)

    # Cosine similarity
    similarity = cosine_similarity([gemini_vec], [rag_vec])[0][0]

    # Save results
    baseline_results.append({
        "query": query,
        "Gemini_Answer": gemini_text,
        "RAG_Answer": rag_text,
        "Similarity_Score": round(similarity, 4)
    })

    time.sleep(5)  # avoid hitting Gemini quota

# ================================
# Save to Excel
# ================================
baseline_df = pd.DataFrame(baseline_results)
baseline_df.to_excel("RAG_vs_Gemini_with_Similarity.xlsx", index=False)

print("✅ Saved RAG vs Gemini results with similarity to RAG_vs_Gemini_with_Similarity.xlsx")


✅ Saved RAG vs Gemini results with similarity to RAG_vs_Gemini_with_Similarity.xlsx
