In [2]:
print("CUROBOT")

CUROBOT


In [None]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import google.generativeai as genai
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_pdf(data):
    loader=DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    docs=loader.load()
    return docs

In [4]:
data=load_pdf(r"C:\Users\sathv\OneDrive\Desktop\ME\Curobot\data")

In [5]:
def text_split(data):
    splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    chunk=splitter.split_documents(data)
    return chunk

In [6]:
chunk=text_split(data)

In [7]:
def download_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [8]:
embeddings=download_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [9]:
load_dotenv()

True

In [10]:
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY
os.environ["GOOGLE_API_KEY"]=GOOGLE_API_KEY

In [11]:
pinecone_api_key=PINECONE_API_KEY
pc=Pinecone(api_key=pinecone_api_key)

In [12]:
index_name="curobot"
if index_name not in pc.list_indexes().names():
    pc.create_index(name=index_name,dimension=384,metric="cosine",
                    spec=ServerlessSpec(cloud="aws",region="us-east-1"))
index=pc.Index(index_name)

In [13]:
# docsearch=PineconeVectorStore.from_documents(documents=chunk,embedding=embeddings,index_name=index_name)

In [14]:
docsearch=PineconeVectorStore.from_existing_index(index_name=index_name,embedding=embeddings)

In [15]:
retriever=docsearch.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [16]:
genai.configure(api_key=GOOGLE_API_KEY)
model=ChatGoogleGenerativeAI(model="gemini-2.5-pro",google_api_key=GOOGLE_API_KEY)
# model=genai.GenerativeModel('models/gemini-2.5-pro')

In [17]:
chat_history=[]

In [18]:
def log_chat(query, answer):
    chat_history.append({
        "timestamp": datetime.now().isoformat(),
        "user": query,
        "bot": answer
    })

In [19]:
system_prompt=(
    "You are an intelligent, trustworthy medical assistant designed to help users with accurate and safe information."
    "Below is a context extracted from a verified medical handbook. Use this context to answer the user's question **precisely and thoroughly**"
    "If the context does not contain enough information to fully answer the question, you may also rely on your general medical knowledge - but always indicate when you are doing so"
    "Always maintain a clear, concise, and professional tone. If the question is outside your scope or potentially harmful without professional diagnosis, advise the user to consult a licensed healthcare provider"
)
fallback_prompt=("You are an intelligent, trustworthy, reliable medical assistant designed to help users with accurate and safe information"
    "If possible, answer it using your general medical knowledge"
    "Be accurate, professional, and safe in your response. Always maintain a clear, concise, and professional tone."
    "If the question is outside your scope or potentially harmful without professional diagnosis, advise the user to consult a licensed healthcare provider")
prompt=ChatPromptTemplate.from_messages([
    ("system",system_prompt),
    ("user","Context : {context}\n\n Question : {input}")
])

In [20]:
question_answer_chain=create_stuff_documents_chain(model,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [21]:
def format_history_for_context(history):
    return "\n".join(f"User : {msg['user']}\nBot : {msg['bot']}" for msg in history)

In [25]:
def gemini_response(query,context=None):
    response=model.invoke("\n\n".join([fallback_prompt,f"User query : {query}",f"Additional Context : {context}"]))
    return response.content

In [26]:
def rag_or_gemini(query,threshold=0.5):
    retrieved_docs=retriever.get_relevant_documents(query)
    context_text_db="\n\n".join([doc.page_content for doc in retrieved_docs])
    context_text_chat = format_history_for_context(chat_history)
    context_chosen=context_text_chat if chat_history else context_text_db
    if not chat_history and retrieved_docs:
        try:
            result=rag_chain.invoke({"input":query})
            return result["answer"]
        except Exception as e:
            print("RAG failed. Falling back to Gemini")
    
    return gemini_response(query,context=context_chosen)

In [None]:
while True:
    query=input(f"Input query : ")
    response=rag_or_gemini(query)
    log_chat(query,response)
    print(response)

Of course. Here is a clear and safe explanation of allergies based on general medical knowledge.

An allergy is a condition in which the immune system overreacts to a substance that is normally harmless to most people. These substances are known as **allergens**.

When a person with an allergy is exposed to an allergen, their immune system mistakenly identifies it as a dangerous invader. In response, it produces antibodies called Immunoglobulin E (IgE). These antibodies trigger the release of chemicals, most notably **histamine**, which cause the symptoms of an allergic reaction.

**Common Allergens**

Allergens can be grouped into several categories:
*   **Airborne Allergens:** Pollen, pet dander, dust mites, and mold.
*   **Food Allergens:** Common examples include peanuts, tree nuts, milk, eggs, soy, and shellfish.
*   **Insect Stings:** Venom from bees, wasps, and hornets.
*   **Medications:** Penicillin and other drugs.
*   **Contact Allergens:** Substances that cause a reaction o

In [1]:
chat_history.clear()

NameError: name 'chat_history' is not defined