In [7]:
import os 
os.chdir("../")

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#extracting data from pdf 
def load_pdf_file(data):
    loader= DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [9]:
extracted_data=load_pdf_file(data='Data/')

In [None]:
#extracted_data

In [11]:
#performing the chinking operation 
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [12]:
text_chunks = text_split(extracted_data)
print("length of text_chunks: ", len(text_chunks))

length of text_chunks:  14867


In [None]:
#text_chunks

In [19]:
from langchain.embeddings import HuggingFaceEmbeddings

In [24]:
#download embedding model to perform embedding 384 vector dimension
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [None]:
embeddings = download_hugging_face_embeddings()

In [None]:
#test embeddings 
query_result = embeddings.embed_query("hello world")
print("length",len(query_result))
query_result

In [51]:
from dotenv import load_dotenv
load_dotenv()

True

In [52]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
GEMINI_API_KEY = os.environ.get('GOOGLE_API_KEY')

In [None]:
#create index in vectore db for embeddings
from pinecone import Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "care-connect"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec={
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    }
)

In [53]:
import os 
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY
os.environ["GOOGLE_API_KEY"]=GEMINI_API_KEY

In [None]:
#embedding each chunk into pinecone index 
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

In [None]:
#load exsisting index from pinecone 
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
docsearch

In [39]:
retriever = docsearch.as_retriever(search_type="similarity",searcgh_kwargs={"k":3})

In [None]:
retrieved_docs = retriever.invoke("What is Cataract ?")
retrieved_docs

In [54]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.3,
    max_tokens=1024,
    timeout=120,
    max_retries=3,
    top_p=0.9,
    top_k=40
)

In [61]:
#make complete chain 
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate 

system_prompt = (
    """You are a knowledgeable medical assistant providing accurate information based on medical documents.
Context: {context}

Please provide a clear, accurate, and well-structured response following these guidelines:
- Focus on medical facts from the provided context
- Use professional yet understandable language
- Include relevant medical terms with brief explanations
- If the information is not in the context, clearly state that
- For conditions/treatments, mention important disclaimers when appropriate

Answer: """
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ])



In [62]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [65]:
response = rag_chain.invoke({"input": "give me  all the essential symptioms for muscular spasm?"})
print(response["answer"])

Based on the text, here's what is mentioned about muscle spasms:

*   **Muscle Cramp:** A rapid, uncontrolled contraction, or spasm, that happens unexpectedly.
*   **Pain:** Muscle contraction and pain that can last for several minutes, then slowly ease.
*   **Location:** Most common in the calves, feet, and hands, but can affect any muscle.
*   **Associated Symptoms:** Stiffness at rest, slow muscle relaxation, and fasciculation (painless muscle spasm marked by rapid, uncoordinated contraction of many small muscle fibers).
