# load enviornmnet variables

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


# Importing necessary libraries

In [2]:
import google.generativeai as genai
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone,ServerlessSpec

  from .autonotebook import tqdm as notebook_tqdm


## Load pdf data

In [3]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents
extracted_data = load_pdf_file(data="data/")

## spliting text into chunks

In [4]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap = 100)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print(f"Length of text chunks = {len(text_chunks)}")

Length of text chunks = 4471


## Generate embeddings

In [5]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_hugging_face_embeddings()
query_result = embeddings.embed_query("Hello World!")
print("Length",len(query_result))

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Length 384


# Initialize Pinecone

In [None]:
PINECONE_API_KEY: str = os.environ.get('PINECONE_API_KEY')
GEMINI_API_KEY: str = os.environ.get("GEMINI_API_KEY")

In [7]:
# always run 1 times because it create index in vector database
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "cure-now"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [8]:
#Initializing the Pinecone
Pinecone(api_key=PINECONE_API_KEY,environment = "us-east-1"
              )

<pinecone.control.pinecone.Pinecone at 0x232b6e31730>

## storing data in pinecone database

In [9]:
from langchain_pinecone import PineconeVectorStore

docresearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

## Load data from vectordb(pinecone) index

In [10]:
#Load Existing index

from langchain_pinecone import PineconeVectorStore

#embed each chunk and upsert the embeddings into your Pinecone index.

docresearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [11]:
retriever = docresearch.as_retriever(search_type = 'similarity',search_kwargs = {"k" : 3})


In [12]:
retrieved_docs = retriever.invoke("What is Acne?")
print(retrieved_docs)

[Document(id='c52fa619-c221-4ce7-958e-33b1adbe8f5c', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 239.0, 'page_label': '240', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='Isotretinoin (Accutane) is prescribed only for very\nsevere, disfiguring acne.\nAcne is a skin condition that occurs when pores or\nhair follicles become blocked. This allows a waxy\nmaterial, sebum, to collect inside the pores or follicles.\nNormally, sebum flows out onto the skin and hair to\nform a protective coating, but when it cannot get out,\nsmall swellings develop on the skin surface. Bacteria\nand dead skin cells can also collect that can cause\ninflammation. Swellings that are small and not\ninflamed are whiteheads or blackheads. When they\nbecome inflamed, they turn into pimples. Pimples that\nfill with pus are called pustules.\nAcne cannot be cured, but acne dru

## Use gemini for respone of the question.

In [15]:
# from langchain_openai import OpenAI

# llm = OpenAI(temperature=0.4,max_tokens=400)
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite-preview-02-05",
    temperature=0,
    google_api_key = GEMINI_API_KEY,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [17]:
from langchain.prompts import ChatPromptTemplate

In [18]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following piences of retrieved context to answer "
    "the question.If you don't know the answer ,say that you "
    "don't know.Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}"),
    ]
)

In [22]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [23]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)
# rag_chain = create_retrieval_chain(pinecone_vector_store,question_answer_chain)

In [24]:
response = rag_chain.invoke({"input": " what are the chronical diseases in a very simple terms understandble by non doctors "})
print(response["answer"])

I am sorry, but I cannot answer the question. The provided context does not contain information about chronic diseases.


In [25]:
retriever = docresearch.as_retriever(search_type='similarity', search_kwargs={'k': 5})

In [26]:
system_prompt = (
    "You are an AI assistant. Use the retrieved documents to answer the question. "
    "If the answer is not in the documents, say 'The provided documents do not contain enough information.' "
    "Provide a concise and clear response."
    "\n\n"
    "{context}"
)
for doc in retrieved_docs:
    print(doc.page_content)  # Ensure the content has useful information

Isotretinoin (Accutane) is prescribed only for very
severe, disfiguring acne.
Acne is a skin condition that occurs when pores or
hair follicles become blocked. This allows a waxy
material, sebum, to collect inside the pores or follicles.
Normally, sebum flows out onto the skin and hair to
form a protective coating, but when it cannot get out,
small swellings develop on the skin surface. Bacteria
and dead skin cells can also collect that can cause
inflammation. Swellings that are small and not
inflamed are whiteheads or blackheads. When they
become inflamed, they turn into pimples. Pimples that
fill with pus are called pustules.
Acne cannot be cured, but acne drugs can help clear
the skin. Benzoyl peroxide and tretinoin work by mildly
irritating the skin. This encourages skin cells to slough
off, which helps open blocked pores. Benzoyl peroxide
also kills bacteria, which helps prevent whiteheads and
blackheads from turning into pimples. Isotretinoin
result after the plugged follicle is 