In [1]:
print("Medical ChatBot")

Medical ChatBot


In [35]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Pinecone as PineconeStore
from pinecone import Pinecone, ServerlessSpec
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain_groq import ChatGroq
from langchain_pinecone import PineconeEmbeddings
import os

### ***Setting the API Key***

### ***Creating the Pinecone Index***

In [37]:
pinecone = Pinecone(
    api_key=os.environ['PINECONE_API_KEY']
)

In [12]:
index_name = "medical-chatbot"

pinecone.create_index(
    name=index_name,
    dimension=1024, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

### ***Load Data***

In [14]:
def load_pdf(pdf_file):
    loader = DirectoryLoader(pdf_file,
                             glob = "*.pdf",
                             loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [16]:
extracted_data = load_pdf('Data/')

In [17]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks


In [18]:
text_chunks = text_split(extracted_data)

In [19]:
len(text_chunks)

3248

In [20]:
print(text_chunks[0])

page_content='TheGALE
ENCYCLOPEDIA
ofMEDICINE
SECOND EDITION' metadata={'source': 'Data\\Medical_book.pdf', 'page': 1}


### ***Converting Text chunks to Embeddings***

In [38]:
embeddings = PineconeEmbeddings(model="multilingual-e5-large")

In [39]:
# Chunks converted to embeddings and loaded to pinecone dataset
docsearch = PineconeStore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [41]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x1be4e98cd10>

In [42]:
query = 'What is allergies'

In [43]:
docs = docsearch.similarity_search(query, k =3)

In [44]:
print(docs)

[Document(metadata={}, page_content='mally, the immune system responds to foreign microor-ganisms, or particles, like pollen or dust, by producingspecific proteins, called antibodies, that are capable ofbinding to identifying molecules, or antigens, on the for-eign particle. This reaction between antibody and antigensets off a series of reactions designed to protect the bodyfrom infection. Sometimes, this same series of reactionsis triggered by harmless, everyday substances. This is thecondition known as allergy, and the offending substanceis called an allergen.\nLike all allergic reactions, AR involves a special'), Document(metadata={}, page_content='mally, the immune system responds to foreign microor-ganisms and particles, like pollen or dust, by producingspecific proteins called antibodies that are capable ofbinding to identifying molecules, or antigens, on theforeign organisms. This reaction between antibody andantigen sets off a series of reactions designed to protectthe body fro

In [45]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer. just say that you don't know the answer don't try to make a irrelevant answer.

Context : {context}
Question : {question}

Only return the helpful answer below and mothing else.
Helpful Answer : 
"""

In [46]:
Prompt = PromptTemplate(template = prompt_template, input_variables = ['context','question'])
chain_type_kwargs = {'prompt':Prompt}

In [47]:
llm = ChatGroq()

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x000001BE5759D760>


In [53]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = 'stuff',
    retriever = docsearch.as_retriever(search_kwargs = {'k':1}),
    return_source_documents = True,
    chain_type_kwargs = chain_type_kwargs
)

In [58]:
output = qa('What is acne')

In [61]:
result = output['result']

In [60]:
source_documents = output['source_documents']


In [63]:
print("Answer : ", result)
print("Source : ", source_documents)

Answer :  Acne is a skin condition that occurs when pores or hair follicles become blocked, leading to the accumulation of sebum, bacteria, and dead skin cells. This can result in various types of swellings on the skin surface, such as whiteheads, blackheads, and pimples. While acne cannot be cured, it can be managed with the use of acne drugs like benzoyl peroxide, tretinoin, and isotretinoin. Benzoyl peroxide works by mildly irritating the skin and killing bacteria, which helps prevent blocked pores from turning into pimples.
Source :  [Document(metadata={}, page_content='Acne is a skin condition that occurs when pores or\nhair follicles become blocked. This allows a waxymaterial, sebum, to collect inside the pores or follicles.Normally, sebum flows out onto the skin and hair toform a protective coating, but when it cannot get out,small swellings develop on the skin surface. Bacteriaand dead skin cells can also collect that can causeinflammation. Swellings that are small and notinfla