In [1]:
from langchain import PromptTemplate 
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [2]:
#Extract data from PDF 
def load_pdf(data):
    loader = DirectoryLoader(data, 
                        glob="*.pdf",
                        loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [3]:
extracted_data = load_pdf("C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data")

In [4]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [5]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 7020


In [6]:
PINECONE_API_KEY="ec7f1a2d-e8f6-4843-841f-a0d789cc86b9"

In [7]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

In [8]:
import time

index_name = "medical-chatbot4"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



In [10]:
query_result = embeddings.embed_query("Hello World")
print("Length", len(query_result))

Length 384


In [11]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [12]:
from uuid import uuid4

from langchain_core.documents import Document


In [13]:
uuids = [str(uuid4()) for _ in range(len(text_chunks))]

vector_store.add_documents(documents=text_chunks, ids=uuids)

['cc5b481c-eda5-4465-a6c1-91b1f173c8fe',
 '92a7a390-ebf3-4200-bfe9-5505fe0a8538',
 '191865e8-7d4a-4dbb-a60c-db325eed0ffe',
 'efa64d1a-a6f4-4d9f-b183-5d84c4effad9',
 'f5fcbdb4-4931-4285-914c-ab774dbf82fa',
 'e22cb752-5052-4429-ae02-1d3c27cff64f',
 '7f965875-45df-469e-9b61-8881886b3a62',
 'b4f72f38-640d-403b-b88c-5d50359a672a',
 '267cb429-25a2-4e34-86e3-88b0e1e11b2f',
 'a138c603-336f-4427-9e87-080e69c17fb4',
 '040e6421-d8fd-4d21-9a26-b92c2b1393d4',
 '7b1795f7-af7d-4c5e-9b8b-aef0ce6438e7',
 '722c56b3-8c24-42ca-9099-c86df8d9310c',
 'fa467565-3d5b-43a1-a6ef-fbb8648f6edc',
 '0f05dcd1-4023-4083-b827-95cd79dbdc3f',
 '6073be66-1127-464e-ac2e-ede6ea06962f',
 'f5f5e2cd-2f78-4fe6-9463-393463e9451c',
 '6880233c-c706-439c-8cfe-07d36ea79eba',
 '8208b2b9-06db-4245-bbd3-51dfc490d49c',
 'ddd4afde-9888-4c1e-92a0-d2ee050fbdf0',
 '1ce658e3-991a-441a-b6ad-062294099615',
 'f64c7393-a8d5-4fbb-9fa3-032a5475bca5',
 '5ecfb07f-07d0-4817-9aeb-49a650b1a9de',
 '6f6a6844-f6be-4dba-afc1-2267f68e06a1',
 'a9860550-8874-

In [14]:
index_stats = index.describe_index_stats()
print("Index Statistics:", index_stats)

Index Statistics: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 7312}},
 'total_vector_count': 7312}


In [15]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke("What are Allergies?")

len(retrieved_docs)

3

In [16]:
retrieved_docs

[Document(metadata={'page': 130.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"),
 Document(metadata={'page': 135.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(metadata={'page': 129.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_bo

In [17]:
print(retrieved_docs[0].page_content)

GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE


In [18]:
print(retrieved_docs[1].page_content)

the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .
The particular allergens to which a person is sensi-


In [19]:
print(retrieved_docs[2].page_content)

allergens are the following:
• plant pollens
• animal fur and dander
• body parts from house mites (microscopic creatures
found in all houses)
• house dust• mold spores• cigarette smoke• solvents• cleaners
Common food allergens include the following:
• nuts, especially peanuts, walnuts, and brazil nuts
• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and preservatives
The following types of drugs commonly cause aller-
gic reactions:
• penicillin or other antibiotics


In [20]:
# from huggingface_hub import login
# login(token = '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')


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\TIFFANY MUN\.cache\huggingface\token
Login successful


In [39]:
# llm = CTransformers(model=r"C:\Users\TIFFANY MUN\Medical-Chatbot-2\model\llama-2-7b-chat.ggmlv3.q4_0.bin",
#                     model_type="llama",
#                     config={'max_new_tokens': 150, 'temperature': 0.5})

In [47]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

In [59]:
# from langchain_core.prompts import ChatPromptTemplate

# system_prompt = (
#     "You are an assistant for question-answering tasks. "
#     "Use the following pieces of retrieved context to answer "
#     "the question. If you don't know the answer, say that you "
#     "don't know. Use three sentences maximum and keep the "
#     "answer concise."
#     "\n\n"
#     "{context}"
# )

# PROMPT = ChatPromptTemplate.from_messages(
#     [
#         ("system", system_prompt),
#         ("human", "{input}"),
#     ]
# )

In [23]:
# from langchain.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain

In [24]:
# question_answer_chain = create_stuff_documents_chain(llm, PROMPT)

In [25]:
# rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [26]:
# response = rag_chain.invoke({"input": "What is Acne?"})
# response

In [27]:
# print(response["answer"])

In [73]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""


custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

result = rag_chain.invoke("What are Allergens?")
print(result)

Allergens are substances that can trigger allergic reactions in the body, such as plant pollens, animal fur, and certain foods like nuts and fish. Thanks for asking!


In [53]:
while True:
    user_input = input("Input Prompt: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the loop.")
        break

    try:
        # Directly pass the user input string
        print(f"Querying with: {user_input}")
        
        result = rag_chain.invoke(user_input)
        
        # Print the result
        print("Response: ", result)
    except Exception as e:
        # Print any exceptions that occur
        print(f"An error occurred: {e}")


Querying with: how do i know if i have athlete's foot?
Response:  Symptoms of athlete's foot include itchy, sore skin on the toes, with scaling, cracking, inflammation, and blisters. Blisters that break, exposing raw patches of tissue, can cause pain and swelling. If the itching and burning worsen, it may indicate athlete's foot. Thanks for asking!
Exiting the loop.


In [67]:
formatted_docs = format_docs(retrieved_docs)
print(formatted_docs)  # Print formatted documents to verify


GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE

the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .
The particular allergens to which a person is sensi-

allergens are the following:
• plant pollens
• animal fur and dander
• body parts from house mites (microscopic creatures
found in all houses)
• house dust• mold spores• cigarette smoke• solvents• cleaners
Common food allergens include the following:
• nuts, especially peanuts, walnuts, and brazil nuts
• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and p

In [74]:
query = "What are Allergens?"
retrieved_docs = retriever.invoke(query)
formatted_docs = format_docs(retrieved_docs)
prompt = custom_rag_prompt.format(context=formatted_docs, question=query)
print("Prompt:", prompt)  # Print the prompt to see if it includes the context

response = llm(prompt)  # Get the LLM response
print("LLM Response:", response)


Prompt: Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

allergens are the following:
• plant pollens
• animal fur and dander
• body parts from house mites (microscopic creatures
found in all houses)
• house dust• mold spores• cigarette smoke• solvents• cleaners
Common food allergens include the following:
• nuts, especially peanuts, walnuts, and brazil nuts
• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and preservatives
The following types of drugs commonly cause aller-
gic reactions:
• penicillin or other antibiotics

and the offend-ing substance is called an allergen. Common inhaledallergens include pollen, dust, and insect parts from tinyhouse mites. Common food allergens include nuts, fish,and milk.

When thisoccurs, an

In [72]:
while True:
    user_input = input("Input Prompt: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the loop.")
        break

    try:
        # Retrieve relevant documents based on the user input
        retrieved_docs = retriever.invoke(user_input)
        
        # Format the retrieved documents
        formatted_docs = format_docs(retrieved_docs)
        
        # Create the prompt using the formatted context
        prompt = custom_rag_prompt.format(context=formatted_docs, question=user_input)
        print("Prompt:", prompt)  # Print the prompt to see if it includes the context
        
        # Get the LLM response
        response = llm(prompt)
        
        # Print the result
        print("Response:", response)
        
    except Exception as e:
        # Print any exceptions that occur
        print(f"An error occurred: {e}")


Prompt: Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

which the skin becomes itchy and sore, cracking andpeeling away. Athlete’s foot (also known as tinea pedis orfoot ringworm ) can be treated, but it can be tenacious
and difficult to clear up completely.
Description
Athlete’s foot is a very common condition of itchy,

they flourish in and around swimming pools, showers,and locker rooms. Tinea pedis got its common namebecause the infection was common among athletes whooften used these areas.
Causes and symptoms
Athlete’s foot is caused by a fungal infection that
most often affects the fourth and fifth toe webs. Trichophy-
ton rubrum ,T. mentagrophytes , and Epidermophyton floc-
cosum , the fungi that cause athlete’s foot, are unusual in

abs