In [1]:
from langchain import PromptTemplate
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone,ServerlessSpec
from langchain.document_loaders import DirectoryLoader, PyPDFLoader  , PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Pinecone
from langchain.chains.question_answering import load_qa_chain
from langchain_google_genai import ChatGoogleGenerativeAI


import os


  from tqdm.autonotebook import tqdm


In [2]:
ROOT_DIR = os.path.abspath('..')
# data_path = r"..\..\C:\Users\Administrator\Documents\Langchian_tasks\M_bot_st\data\medical_book.pdf"


In [3]:

PINECONE_INDEX_NAME = "medical-chatbot"
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [4]:
def load_data(data_path):
    print(data_path)
    loader = PyPDFDirectoryLoader(data_path)
    data = loader.load()
    return data

docs = load_data(ROOT_DIR)
len(docs)

c:\Users\Administrator\Documents\Langchian_tasks\M_bot_st


759

In [5]:
def text_split(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap= 20)
    text_chunks = splitter.split_documents(data)
    return text_chunks

text_chunks = text_split(docs)
print(len(text_chunks))

6970


In [31]:
text_chunks[:5]

[Document(metadata={'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\M_bot_st\\data\\medical_book.pdf', 'page': 0}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION'),
 Document(metadata={'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\M_bot_st\\data\\medical_book.pdf', 'page': 1}, page_content='The G ALE\nENCYCLOPEDIA\nof M EDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nC-F\n2'),
 Document(metadata={'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\M_bot_st\\data\\medical_book.pdf', 'page': 2}, page_content='STAFF\nJacqueline L. Longe,Project Editor\nDeirdre S. Blanchfield, Associate Editor\nChristine B. Jeryan, Managing Editor\nDonna Olendorf, Senior Editor\nStacey Blachford, Associate Editor\nKate Kretschmann, Melissa C. McDade, Ryan\nThomason, Assistant Editors\nMark Springer, Technical Specialist\nAndrea Lopeman, Programmer/Analyst\nBarbara J. Yarrow,Manager, I

In [7]:

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
docsearch = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME, embedding=embeddings)
print("Index successfully loaded.")


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Index successfully loaded.


In [8]:
vector  = embeddings.embed_query("what is Acne")
len(vector)

384

In [None]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
        text_chunks,
        index_name=PINECONE_INDEX_NAME,
        embedding=embeddings
    )

In [9]:
docsearch = PineconeVectorStore.from_existing_index(PINECONE_INDEX_NAME,embeddings)

In [11]:
query = "What are aCNE ?"
docs = docsearch.similarity_search(query,k=3)
print(docs)

[Document(id='6932ab45-de86-4274-9b48-61dee221a2a5', metadata={'page': 298.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='Corticosteroids—A group of anti-inflammatory\nsubstances often used to treat skin conditions.\nImmune response—The protective reaction by the\nimmune system against foreign antigens (sub-\nstances that the body perceives as potentially dan-\ngerous). The immune system combats disease by\nneutralizing or destroying antigens.\ncontact dermatitis becomes a chronic and disabling con-\ndition that can have a profound effect on employability\nand quality of life.\nPrevention'), Document(id='4265ab24-bb20-4270-9438-37335d9f040d', metadata={'page': 298.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='Corticosteroids—A group of anti-inflammatory\nsubstances often used to treat skin conditions.\nImmune response—The protectiv

In [12]:
prompt_template = """
Use the given information context to give appropriate answer for the user's question.
If you don't know the answer, just say that you know the answer, but don't make up an answer.
Context: {context}
Question: {question}
Only return the appropriate answer and nothing else.
Helpful answer:
"""

In [13]:
prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])
chain_type_kwargs = {"prompt":prompt}
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\nUse the given information context to give appropriate answer for the user's question.\nIf you don't know the answer, just say that you know the answer, but don't make up an answer.\nContext: {context}\nQuestion: {question}\nOnly return the appropriate answer and nothing else.\nHelpful answer:\n")

In [14]:
# config = {'max_new_tokens': 512, 'temperature': 0.5}
# llm = CTransformers(model='TheBloke/Llama-2-7B-Chat-GGML',model_file='llama-2-7b-chat.ggmlv3.q4_0.bin',model_type='llama' ,config=config)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.5, max_tokens=None, timeout=None)
chain = load_qa_chain(llm , chain_type="stuff")

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm , chain_type="stuff")


In [15]:
def retrive_query(query):
    matching_results = docsearch.similarity_search(query,k=10)
    return matching_results


def retrive_answer(query):
    doc_search  = retrive_query(query)
    print(doc_search)
    response = chain.run(input_documents = doc_search , question = query)
    return response

In [16]:
que = "what is Vitamins"
answer = retrive_answer(que)
print(answer)

[Document(id='2a2edc06-b18d-4835-9346-dd4674e15f42', metadata={'page': 732.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='food manufacturers to add folic acid to enriched bread\nand grain products to boost intake and to help prevent\nneural tube defects (NTD).\nPurpose\nFolic acid works together with vitamin B12 and vita-\nmin C to metabolize protein in the body. It is important\nfor the formation of red and white blood cells. It is neces-\nsary for the proper differentiation and growth of cells and\nfor the development of the fetus. It is also used to form\nthe nucleic acid of DNA and RNA. It increases the'), Document(id='c1142571-9774-4d4b-adea-c9717ca259a3', metadata={'page': 732.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='food manufacturers to add folic acid to enriched bread\nand grain products to boost intake and to help pr

  response = chain.run(input_documents = doc_search , question = query)


Vitamins are organic compounds that are essential for normal growth and nutrition and are required in small quantities in the diet because they cannot be synthesized by the body.  They are classified as either fat-soluble (vitamins A, D, E, and K) or water-soluble (B vitamins and vitamin C).



In [43]:
que = "name some vitamins which are reqired for human "
answer = retrive_answer(que)
print(answer)

[Document(id='021a10b6-e333-45f4-8291-7e3e7c68da2e', metadata={'page': 501.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='Alternative practitioners concentrate on good nutri-\ntion as a way to prevent heavy periods that are not caused\nby uterine fibroids, endometrial polyps, endometriosis, or\ncancer. Iron supplementation (100 mg per day) not only\nhelps prevent anemia, but also appears to reduce menor-\nrhagia in many women. Other recommended dietary sup-\nplements include vitamins A and C. Vitamin C improves\ncapillary fragility and enhances iron uptake.\nVitamin E and bioflavonoid supplements are also'), Document(id='453726bc-f935-4a1a-9980-e55c5a3ff7b0', metadata={'page': 501.0, 'source': 'c:\\Users\\Administrator\\Documents\\Langchian_tasks\\Medical-Chatbot\\data\\medical_book.pdf'}, page_content='Alternative practitioners concentrate on good nutri-\ntion as a way to prevent heavy periods that are not c