In [2]:
# Load required Libraries

import re

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import  Chroma
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableLambda
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from rich.console import Console
from rich.markdown import Markdown

In [3]:
# Load PDF Documents from directory

loader = DirectoryLoader(
    path = r'C:\Desktop\Chatbot\HerbalDocs',
    glob = '*.pdf',
    loader_cls = PyPDFLoader
    
)

docs = loader.load()
print(len(docs))

1499


In [4]:
# Load Embedding Model
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/multi-qa-mpnet-base-dot-v1')

  embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/multi-qa-mpnet-base-dot-v1')


In [5]:
# Text Splitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,   # size of each chunk (adjustable)
    chunk_overlap=200, # overlap between chunks to preserve context
    separators=["\n\n", "\n", " ", ""]
)

chunks = splitter.split_documents(docs)

In [6]:
chunks[0]

Document(metadata={'producer': 'Acrobat Distiller 4.0 for Macintosh', 'creator': 'PageMaker 5.0', 'creationdate': '2004-05-25T14:59:01+03:00', 'moddate': '2004-05-25T14:59:05+03:00', 'source': 'C:\\Desktop\\Chatbot\\HerbalDocs\\Volume-1.pdf', 'total_pages': 295, 'page': 0, 'page_label': '1'}, page_content='Bulbus Allii Cepae\ni\nWHO\nmonographs\non selected\nmedicinal plants\nVOLUME 1\nWorld Health Organization\nGeneva\n1999')

In [7]:
# Embedding & Vector store  (Already save so run next console)

# vector_store = Chroma.from_documents(
#      documents=chunks,
#     embedding=embedding,
#     persist_directory="./chroma_db"   # saves locally
# )

In [8]:
# Use already saved vector store

vector_store = Chroma(
    persist_directory="./chroma_db",  
    embedding_function=embedding
)

  vector_store = Chroma(


In [None]:
# This model can run with api key

model = ChatGroq(
    model="deepseek-r1-distill-llama-70b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    api_key='gsk_Db************************g3bZDiivSt7T'
    
)

In [10]:
# Retriever

from langchain.retrievers import EnsembleRetriever

retriever = EnsembleRetriever(
    retrievers=[
        vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4}),
        vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4})
    ],
    weights=[0.5, 0.5]
)

In [11]:
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful assistant specialized in medicinal plants. 
Use only the given transcript and chat history to answer.
If the context is incomplete, say so. Do NOT make up information."""),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "Transcript:\n{context}\n\nQuestion:\n{question}")
])

In [13]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

  memory = ConversationBufferMemory(


In [14]:
# Clean function
def clean_output(text: str) -> str:
    
    if not text:
        return ""
    return text.replace("\\n", "\n").replace("\n\n", "\n")
cleaner = RunnableLambda(lambda x: clean_output(x))

In [15]:
parser = StrOutputParser()

In [16]:
chain = chat_prompt | model | parser | cleaner

In [18]:
store = {}  # store multiple sessions
console = Console()

def get_history(session_id: str):
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

with_history = RunnableWithMessageHistory(
    chain,
    get_history,
    input_messages_key="question",
    history_messages_key="chat_history"
)

# ================== Interactive Loop ==================
print("💬 Medical Chatbot Ready! Type 'exit' to quit.")
session_id = "user1"   # you can change per user

while True:
    user_query = input("\n🧑 You: ")
    if user_query.lower() in ["exit", "quit"]:
        console.print("\n🤖 Bot: 👋 Chat ended.", style="bold green")
        break

    # Retrieve context
    retrieve_text = retriever.invoke(user_query)
    knowledge_base = " ".join(text.page_content for text in retrieve_text)

    # Run with memory
    response = with_history.invoke(
        {"context": knowledge_base, "question": user_query},
        config={"configurable": {"session_id": session_id}}
    )

    # Show conversation in chat-like format
    console.print(f"\n🧑 You: {user_query}", style="bold cyan")
    console.print("\n🤖 Bot:", style="bold green")
    console.print(Markdown(str(response)))

💬 Medical Chatbot Ready! Type 'exit' to quit.
