In [1]:
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain_pinecone import PineconeVectorStore

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os
import pinecone
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
PINECONE_API_HOST = os.environ.get('PINECONE_API_ENV')

from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=PINECONE_API_KEY)

In [4]:
##Creating the Index in Pinecone
index_name = "medical-chat-bot"  # change if desired
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

In [12]:
##Configuring the pinecone index
# import time
# if index_name not in existing_indexes:
#     pc.create_index(
#         name=index_name,
#         dimension=384,
#         metric="cosine",
#         spec=ServerlessSpec(cloud="aws", region="us-east-1"),
#     )
#     while not pc.describe_index(index_name).status["ready"]:
#         time.sleep(1)

# index = pc.Index(index_name)

In [5]:
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents = loader.load()

    return documents

In [6]:
extracted_data = load_pdf("data/")

In [14]:
#extracted_data

In [7]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 2
    )
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)

In [5]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"

In [6]:
#download embedding model
def download_hugging_face_embeddings(model_name):
    embeddings = HuggingFaceEmbeddings(model_name = model_name)
    return embeddings

In [26]:
embeddings = download_hugging_face_embeddings(model_name)

In [27]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [33]:

# result = embeddings.embed_query("how are you")
# len(result)

In [36]:
from langchain_pinecone import PineconeVectorStore
index_name = "medical-chat-bot"
docsearch = PineconeVectorStore.from_documents(text_chunks, embeddings, index_name=index_name)

KeyboardInterrupt: 

In [28]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)


In [29]:
query = "what are allergies"
retriever = vectorstore.as_retriever(search_type="mmr")
matched_docs = retriever.invoke(query)
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i+1}\n")
    print(d.page_content)


## Document 1

American Academy of Ophthalmology. 655 Beach Street, PO
Box 7424, San Francisco, CA 94120-7424. <http://www.eyenet.org>.KEY TERMS
Allergen —A substance capable of inducing an
allergic response.
Allergic reaction —An immune system reaction to
a substance in the environment; symptomsinclude rash, inflammation, sneezing, itchy wateryeyes, and runny nose.
Conjunctiva —The mucous membrane that covers
the white part of the eyes and lines the eyelids.
Edema —A condition where tissues contain exces-

## Document 2

aller-gies and hypersensitivity to foods, chemicals, and otheragents. Other tests for food allergies are the eliminationand rotation diets, in which foods are systematically eval-uated to determine the ones that are causing problems.

## Document 3

causes much more severe symptoms and generally afever. Allergies to molds or pollens also can make the
nose run. Allergies are usually more persistent than thecommon cold. An allergist can do tests to determine ifthe cold

In [24]:
prompt_template = """
Use the following pieces of information to answer the user's question .
If you don't know the answer, just say you don't knwo, don't try to make up an answer

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else
Helpful answer:
"""

In [25]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [37]:
llm = CTransformers(model=r"model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={"max_new_tokens" : 512,
                            'temperature': 0.8}
                    )

In [41]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")


In [None]:
llm.invoke("what is attention in genai")

In [31]:
qa = RetrievalQA.from_chain_type(llm = llm,
                                 chain_type="stuff",
                                 retriever = vectorstore.as_retriever(search_kwarg = {'k': 2}),
                                 return_source_documents = True,
                                 chain_type_kwargs= chain_type_kwargs
                                 )

In [42]:
invoked = qa.invoke(query)

In [43]:
print(invoked['result'])

An allergic reaction is an immune system reaction to a substance in the environment; symptoms include rash, inflammation, sneezing, itchy watery eyes, and runny nose. 



In [22]:
import sys
while True:
    user_input = input(f'input Prompt: ')
    if user_input == 'exit':
        print('Exiting....')
        sys.exit()
    if user_input == '':
        continue
    result = qa.invoke({'query': user_input})
    print(f"query: {result['query']} \nanswer: {result['result']}")