In [15]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
!pip install langchain sentence_transformers chromadb llama-cpp-python langchain_community pypdf



In [17]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA, LLMChain

In [18]:
loader = PyPDFDirectoryLoader("/content/drive/MyDrive/HealthyHeart Chatbot")
docs = loader.load()

In [19]:
len(docs) #number of pages

95

In [20]:
docs[10]

Document(metadata={'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf', 'page': 10}, page_content='6\ndiscussing them openly can help your doc tor find out your chances of\ndeveloping heart disease. It can also help your doctor work more effec-tively with you to reduce your risk.\nKeep it simple. If you don’t understand something your doctor says,\nask for an explanation in plain language. Be especially sure youunderstand why and how to take any medication you’re given. Ifyou are worried about understanding what the doctor says, or if youhave trouble hearing, bring a friend or relative with you to yourappointment. You may want to ask that person to write down thedoctor’s instructions for you.Your Guide to a Healthy HeartQuestions To\nAsk Your Doctor\nGetting answers to these questions will give you important infor-\nmation about your heart health and what you can do to improveit. You may want to bring this list to your doctor’s office. \n1. What is my risk for hear

In [21]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=50 )
chunks = text_splitter.split_documents(docs)


In [22]:
len(chunks)

747

In [23]:
chunks[6]

Document(metadata={'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf', 'page': 3}, page_content='What’s Your Risk? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5\nHow To Talk With Your Doctor . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5')

In [24]:
chunks[7]

Document(metadata={'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf', 'page': 3}, page_content='Major Risk Factors. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11')

In [25]:
import os
os.environ['HUGGINGFACEHUB_API_HUB']="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"

In [26]:
embeddings=SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

In [27]:
vectorstore = Chroma.from_documents(chunks,embeddings)

In [28]:
query = "Who is at risk of heart disease?"
search_results = vectorstore.similarity_search(query)

In [29]:
search_results

[Document(metadata={'page': 8, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='While each risk factor increases your risk of heart disease, having'),
 Document(metadata={'page': 5, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='■As early as age 45, a man’s risk of heart disease begins to rise significantly. For a woman, risk starts to increase at age 55.\n■Fifty percent of men and 64 percent of women who die suddenlyof heart disease have no previous symptoms of the disease.1Heart Disease: Why Should You Care?Heart Disease:'),
 Document(metadata={'page': 6, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='factor greatly increases the chances of developing heart disease.Moreover, the worse a particular risk factor is, the more likely youare to develop heart disease. For example, if you have high bloodpressure, the higher it is, the greater your chances of developingh

In [30]:
retriever = vectorstore.as_retriever(search_kwargs={'k':3})

In [31]:
retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(metadata={'page': 8, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='While each risk factor increases your risk of heart disease, having'),
 Document(metadata={'page': 5, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='■As early as age 45, a man’s risk of heart disease begins to rise significantly. For a woman, risk starts to increase at age 55.\n■Fifty percent of men and 64 percent of women who die suddenlyof heart disease have no previous symptoms of the disease.1Heart Disease: Why Should You Care?Heart Disease:'),
 Document(metadata={'page': 6, 'source': '/content/drive/MyDrive/HealthyHeart Chatbot/healthyheart.pdf'}, page_content='factor greatly increases the chances of developing heart disease.Moreover, the worse a particular risk factor is, the more likely youare to develop heart disease. For example, if you have high bloodpressure, the higher it is, the greater your chances of developingh

In [32]:
llm= LlamaCpp(
    model_path="/content/drive/MyDrive/HealthyHeart Chatbot/Data/ggml-model-Q4_K_M.gguf",
    temperature=0.2,
    max_tokens = 2048,
    top_p=1
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /content/drive/MyDrive/HealthyHeart Chatbot/Data/ggml-model-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = models
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:           

In [33]:
template="""
<|context|>
You are a medical assistant thatr follows the instructions and generate the acurate response based on the query and the context provided.
Please be truthful and give direct answers.
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

In [34]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

In [35]:
prompt= ChatPromptTemplate.from_template(template)

In [36]:
rag_chain = (
    {"context": retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [37]:
response=rag_chain.invoke(query)


llama_print_timings:        load time =    4720.69 ms
llama_print_timings:      sample time =      31.46 ms /    48 runs   (    0.66 ms per token,  1525.50 tokens per second)
llama_print_timings: prompt eval time =   43436.73 ms /    76 tokens (  571.54 ms per token,     1.75 tokens per second)
llama_print_timings:        eval time =   38031.62 ms /    47 runs   (  809.18 ms per token,     1.24 tokens per second)
llama_print_timings:       total time =   81582.46 ms /   123 tokens


In [39]:
response

'The risk factors for heart disease include high blood pressure, high cholesterol levels, smoking, a family history of heart disease, diabetes, overweight and obesity, lack of physical activity, and the consumption of alcohol.'

In [None]:
import sys

while True:
  user_input = input(f"Input query: ")
  if user_input == 'exit':
    print("Exiting...")
    sys.exit()
  if user_input=="":
    continue
  result = rag_chain.invoke(user_input)
  print("Answer: ", result)

Input query: What are diseases that effect heart health


Llama.generate: 57 prefix-match hit, remaining 18 prompt tokens to eval

llama_print_timings:        load time =    4720.69 ms
llama_print_timings:      sample time =      44.03 ms /    59 runs   (    0.75 ms per token,  1339.90 tokens per second)
llama_print_timings: prompt eval time =   10682.24 ms /    18 tokens (  593.46 ms per token,     1.69 tokens per second)
llama_print_timings:        eval time =   48633.66 ms /    58 runs   (  838.51 ms per token,     1.19 tokens per second)
llama_print_timings:       total time =   59436.92 ms /    76 tokens


Answer:  Heart failure, coronary artery disease, hypertension, atherosclerosis, myocardial infarction, arrhythmia, cardiomyopathy, and congenital heart defects. These are some of the diseases that affect heart health.
