In [2]:
import os
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone
from pinecone import Pinecone as pin
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers
import pypdf

In [3]:
# PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY',"5330cdba-6ebc-4c61-9bae-e602a362aeb4")
PINECONE_API_KEY = '5330cdba-6ebc-4c61-9bae-e602a362aeb4'
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [None]:
def load_pdfs(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    
    documents = loader.load()

    return documents

In [5]:
loader = PyPDFDirectoryLoader("data")

In [6]:
# docs = load_pdfs("data")
docs = loader.load()

In [5]:
len(docs)

4005

In [7]:
# def split_text(doc):
#     text_spliter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
#     text_chunks = text_spliter.split_documents(doc)

#     return text_chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

In [8]:
# chunks = split_text(docs)
chunks = text_splitter.split_documents(docs)

In [9]:
def dwn_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    return embeddings

In [14]:
embeddings = dwn_huggingface_embeddings()

In [10]:
len(embeddings.embed_query("How are you"))

384

In [10]:
pc = pin(api_key=PINECONE_API_KEY)

In [11]:
index_name = "medical-chatbot"

In [12]:
index = pc.Index(index_name)

In [None]:
docsearch = Pinecone.from_texts(
    [ch.page_content for ch in chunks],
    embeddings,
    index_name=index_name,
)

In [15]:
pc = pin(api_key=PINECONE_API_KEY)
index_name = "medical-chatbot"
pc.list_indexes()

{'indexes': [{'dimension': 384,
              'host': 'medical-chatbot-lc9nxwf.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'cosine',
              'name': 'medical-chatbot',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [12]:
index = pc.Index(index_name)

docsearch = Pinecone.from_texts([ch.page_content for ch in chunks], embeddings, index_name=index_name)

In [13]:
query = "what are Allergies"

ans = docsearch.similarity_search(query, k=3)

print(ans)

[Document(page_content='by harmless, everyday substancessuch as pollen, dust, and animal danders. When thisoccurs, an allergy develops against the offending sub-stance (an allergen.)'), Document(page_content='Richard Robinson\nAllergies\nDefinition\nAllergies are abnormal reactions of the immune sys-\ntem that occur in response to otherwise harmless sub-stances.\nGALE ENCYCLOPEDIA OF MEDICINE 2 114AllergiesGEM - 0001 to 0432 - A  10/22/03 1:42 PM  Page 114'), Document(page_content='Mygund and R. M. Naclerio. Philadelphia: W. B. Saun-ders Co., 1993.\nLawlor, G. J. Jr., T. J. Fischer, and D. C. Adelman. Manual of\nAllergy and Immunology. Boston: Little, Brown and Co.,\n1995.\nNovick, N. L. You Can Do Something About Your Allergies.\nNew York: Macmillan, 1994.\nWeil, A. Natural Health, Natural Medicine: A Comprehensive\nManual for Wellness and Self-Care. New York: Houghton\nMifflin, 1995.\nRichard Robinson\nAllergies\nDefinition\nAllergies are abnormal reactions of the immune sys-')]


In [16]:
docsearch = Pinecone.from_existing_index(index_name, embeddings) 

query = "what are Allergies"

ans = docsearch.similarity_search(query, k=3)

print(ans)

[Document(page_content='by harmless, everyday substancessuch as pollen, dust, and animal danders. When thisoccurs, an allergy develops against the offending sub-stance (an allergen.)'), Document(page_content='Richard Robinson\nAllergies\nDefinition\nAllergies are abnormal reactions of the immune sys-\ntem that occur in response to otherwise harmless sub-stances.\nGALE ENCYCLOPEDIA OF MEDICINE 2 114AllergiesGEM - 0001 to 0432 - A  10/22/03 1:42 PM  Page 114'), Document(page_content='Mygund and R. M. Naclerio. Philadelphia: W. B. Saun-ders Co., 1993.\nLawlor, G. J. Jr., T. J. Fischer, and D. C. Adelman. Manual of\nAllergy and Immunology. Boston: Little, Brown and Co.,\n1995.\nNovick, N. L. You Can Do Something About Your Allergies.\nNew York: Macmillan, 1994.\nWeil, A. Natural Health, Natural Medicine: A Comprehensive\nManual for Wellness and Self-Care. New York: Houghton\nMifflin, 1995.\nRichard Robinson\nAllergies\nDefinition\nAllergies are abnormal reactions of the immune sys-')]


In [17]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [18]:
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

chain_type_kwargs = {'prompt': PROMPT}

In [None]:
llm = CTransformers(
    model="/model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={'max_new_tokens': 512, 'temperature': 0.8}
)

: 

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
while True:
    user_input = input(f"Inut prompt: ")
    result = qa({"query": user_input})
    print("Response:", result["result"])