In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from pinecone import Pinecone as pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
from dotenv import load_dotenv
import os
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

  from tqdm.autonotebook import tqdm


In [2]:
load_dotenv()

True

In [3]:
api_key = os.getenv("PINECONE_API_KEY")
env_key = os.getenv("PINECONE_API_ENV")
index_pinecone = os.getenv("PINECONE_INDEX")

In [4]:
#function to load the dataset
def load_data(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_data("D:\Projects to practice\MedicalChatbot\data")

In [6]:
def text_splitter(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks = text_splitter(extracted_data)
print("text chunk size is: ", len(text_chunks))

text chunk size is:  7020


In [8]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_hugging_face_embeddings()

In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
#test
query_result = embeddings.embed_query("this is a test")
print("length", len(query_result))

length 384


In [12]:
print(index_pinecone)

medchatbotindex


In [14]:
#initializing pinecone
pc=pinecone(api_key=api_key)
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_pinecone)

In [15]:
#testing pinecone
docsearch_1 = Pinecone.from_existing_index(index_pinecone, embeddings)

In [16]:
query = "what is blood test"
docs = docsearch_1.similarity_search(query, k=3)

In [17]:
print(docs)

[Document(page_content='Blood gas analysis\nDefinition\nBlood gas analysis, also called arterial blood gas\n(ABG) analysis, is a test which measures the amounts ofoxygen and carbon dioxide in the blood, as well as theacidity (pH) of the blood.\nPurpose\nAn ABG analysis evaluates how effectively the'), Document(page_content='Blood typing and\ncrossmatching\nDefinition\nBlood typing is a laboratory test done to determine a\nperson’s blood type. If the person needs a blood transfu-\nsion, another test called crossmatching is done after the\nblood is typed to find blood from a donor that the per-son’s body will accept.\nPurpose\nBlood typing and crossmatching are most common-'), Document(page_content='Blood tests\nAlthough tests designed to detect a specific protein')]


In [18]:
#prompting starts
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [19]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt":PROMPT}

In [20]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin"

In [21]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [22]:
model_path

'C:\\Users\\ASUS\\.cache\\huggingface\\hub\\models--TheBloke--Llama-2-13B-chat-GGML\\snapshots\\3140827b4dfcb6b562cd87ee3d7f07109b014dd0\\llama-2-13b-chat.ggmlv3.q5_1.bin'

In [23]:
llm = CTransformers(model=model_path, model_type="llama", config={'max_new_tokens':512, 'temperature':0.8})

In [28]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [29]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])


  warn_deprecated(


Response :  Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.


KeyboardInterrupt: 