In [94]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
#from langchain.vectorstores import Pinecoe
#from pinecone import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from ctransformers import AutoModelForCausalLM
from langchain.llms import CTransformers

In [42]:
PINECONE_API_KEY = "4e3fee5e-29a5-46f2-96d5-f3b68a5e0a25"
PINECONE_API_ENV = "gcp-starter"
INDEX_NAME = "medicbot"

In [46]:
import os
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

### Initializins second way
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("medicbot")

In [4]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf("D:\MedicBot\data")

In [6]:
# extracted_data

In [7]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 50)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 3276


In [9]:
# text_chunks

In [10]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embeddings = download_hugging_face_embeddings()

In [14]:
embeddings.

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [12]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [50]:
vec = embeddings.aembed_query('key terms of Acetaminophe')

In [17]:
# query_result

In [41]:
from langchain_pinecone import PineconeVectorStore

In [48]:
docsearch = PineconeVectorStore.from_documents(text_chunks, embeddings, index_name=INDEX_NAME)

In [51]:
query = "What are Allergies"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='Allergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-ganisms and particles, like pollen or dust, by producingspecific proteins called antibodies that are capable ofbinding to identifying molecules, or antigens, on theforeign organisms. This reaction between antibody andantigen sets off a series of reactions designed to protectthe body from infection. Sometimes, this same series ofreactions is triggered by harmless, everyday substances.This is the condition known as allergy, and the offend-ing substance is called an allergen. Common inhaledallergens include pollen, dust, and insect parts from tinyhouse mites. Common food allergens include nuts, fish,and milk.\nAllergic reactions involve a special set of cells in', metadata={'page': 135.0, 'source': 'D:\\MedicBot\\data\\Medical_book.pdf'}), Document(page_content='Description\nAllergies are among the most common of medical\ndisorders. It is estimated that 60 

In [52]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [53]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [98]:
path = "D:\MedicBot\model\llama-2-7b-chat.ggmlv3.q2_K.bin"
lllm=CTransformers(model=path,
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

RuntimeError: Failed to create LLM 'llama' from 'D:\MedicBot\model\llama-2-7b-chat.ggmlv3.q2_K.bin'.

In [23]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])