In [104]:
%pwd

'c:\\Users\\Asus\\OneDrive\\Desktop\\Medical-ChatBot'

In [74]:
import os
os.chdir("Medical-ChatBot")

In [105]:
%pwd

'c:\\Users\\Asus\\OneDrive\\Desktop\\Medical-ChatBot'

In [76]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [77]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [103]:
extracted_data = load_pdf_file('Data/')

In [18]:
#extracted_data

In [79]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [80]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5860


In [81]:
from langchain.embeddings import HuggingFaceEmbeddings

In [82]:
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [83]:
embeddings = download_hugging_face_embeddings()

In [None]:
pip install --upgrade huggingface_hub sentence-transformers langchain langchain-community transformers

In [84]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [86]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medibot"


pc.create_index(
    name=index_name,
    dimension=384, 
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws", 
        region="us-east-1"
    ) 
) 

In [89]:
# Embed each chunk and upsert the embeddings into your Pinecone index.
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings, 
)

In [90]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [91]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x20f801c79a0>

In [92]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [93]:
retrieved_docs = retriever.invoke("What is Acne?")

In [94]:
retrieved_docs

[Document(id='caca616f-e7ae-4844-bd05-c66cd61b7547', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='cdfa92a0-b482-4fe8-b614-a99a8ea3b457', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='f81f33b3-c553-4a64-8f01-179694db4c67', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 38.0, 

In [95]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

# Load API key from .env file
load_dotenv()

# Retrieve API key from environment variables
api_key = os.getenv("GEMINI_API_KEY")

# Ensure the key is loaded correctly
if not api_key:
    raise ValueError("GOOGLE_API_KEY not found. Make sure it's set in the .env file.")

# Initialize Gemini model with the API key
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.4, max_output_tokens=500, google_api_key=api_key)

In [100]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "You are an assistant for question-answering tasks. "
                  "Use the following retrieved context to answer the question. "
                  "If you don't know the answer, say that you don't know. "
                  "Use three sentences maximum and keep the answer concise."
                  "\n\nContext: {context}\n\nQuestion: {input}"),
    ]
)

In [101]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [69]:
import google.generativeai as genai

genai.configure(api_key=api_key)

response = genai.GenerativeModel("gemini-pro").generate_content("Hello! How are you?")
print(response.text)


I am an AI chatbot assistant. I do not have personal feelings or emotions, so I don't experience being good or bad. My purpose is to provide information and assist users to the best of my abilities. How can I assist you today?


In [102]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])

Acromegaly is a disorder in which the abnormal release of a particular chemical from the pituitary gland in the brain causes increased growth in bone and soft tissue, as well as a variety of other disturbances throughout the body. Gigantism is a condition in which a person grows to an abnormally large size, typically due to a pituitary gland disorder that results in excessive growth hormone production.
