In [1]:
print("Medical Chatbot")

Medical Chatbot


In [2]:
from langchain import PromptTemplate
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.vectorstores import Pinecone as LangchainPinecone
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_community.vectorstores import Pinecone
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


  from tqdm.autonotebook import tqdm


In [3]:
def load_pdf(data):
    loader=DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [4]:
extracted_data=load_pdf(r"C:\Users\sathv\Medical-Chatbot\data")

In [5]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks


In [6]:
text_chunks=text_split(extracted_data)



In [7]:
def download_huggingface_embedding():
    embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [8]:
embeddings=download_huggingface_embedding()

  embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [9]:
query_result=embeddings.embed_query("Hello world")

In [10]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [12]:
from pinecone import Pinecone
pinecone_api_key=PINECONE_API_KEY
pc = Pinecone(api_key=pinecone_api_key)

index_name = "medical-chatbot"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws", 
            region="us-east-1"
        )
    )

index = pc.Index(index_name)

docsearch=PineconeVectorStore.from_documents(documents=text_chunks,embedding=embeddings,index_name=index_name)
docsearch=PineconeVectorStore.from_existing_index(index_name=index_name,embedding=embeddings)


In [13]:
query="What are Allergies"
docs=docsearch.similarity_search(query,k=3)
print(docs)

[Document(metadata={'page': 659.0, 'source': 'C:\\Users\\sathv\\Medical-Chatbot\\data\\Medical_book.pdf'}, page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.\neyenet.org>.\nKEY TERMS\nAllergen —A substance capable of inducing an\nallergic response.\nAllergic reaction—An immune system reaction to\na substance in the environment; symptoms\ninclude rash, inflammation, sneezing, itchy watery\neyes, and runny nose.\nConjunctiva—The mucous membrane that covers\nthe white part of the eyes and lines the eyelids.'), Document(metadata={'page': 659.0, 'source': 'C:\\Users\\sathv\\Medical-Chatbot\\data\\Medical_book.pdf'}, page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.\neyenet.org>.\nKEY TERMS\nAllergen —A substance capable of inducing an\nallergic response.\nAllergic reaction—An immune system reaction to\na substance in

In [14]:
prompt_template="""
Use the following pieces of information to answer user's questions
If you don't know the answer, just say that you don't know, don't try to make up an answer
Context:{context}
Question:{question}
Only return helpful answers and nothing else
Helpful answer:
"""

In [15]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])
chain_type_kwargs={"prompt":prompt}

In [16]:
llm=CTransformers(model=r"C:\Users\sathv\Medical-Chatbot\model\llama-2-13b-chat.ggmlv3.q6_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})


In [17]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs=retriever.invoke("What is Acne?")

In [18]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [19]:
chain_type_kwargs = {}

qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,    
    chain_type="stuff",
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [20]:
user_input=input(f"Input prompt:")
result=qa({"query":user_input})
print("Response:",result["result"])

  result=qa({"query":user_input})


Response:  Acne is a skin condition caused by allergic reactions to certain substances such as corticosteriods and can be identified using a patch test. 
