In [1]:
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain_pinecone import PineconeVectorStore

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os
import pinecone
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
PINECONE_API_HOST = os.environ.get('PINECONE_API_ENV')

from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=PINECONE_API_KEY)

In [4]:
##Creating the Index in Pinecone
index_name = "medical-chat-bot"  # change if desired
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

In [6]:
##Configuring the pinecone index
# import time
# if index_name not in existing_indexes:
#     pc.create_index(
#         name=index_name,
#         dimension=384,
#         metric="cosine",
#         spec=ServerlessSpec(cloud="aws", region="us-east-1"),
#     )
#     while not pc.describe_index(index_name).status["ready"]:
#         time.sleep(1)

# index = pc.Index(index_name)

In [5]:
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents = loader.load()

    return documents

In [8]:
extracted_data = load_pdf("data/")

In [9]:
#extracted_data

In [10]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 2
    )
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [11]:
text_chunks = text_split(extracted_data)

In [6]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"

In [7]:
#download embedding model
def download_hugging_face_embeddings(model_name):
    embeddings = HuggingFaceEmbeddings(model_name = model_name)
    return embeddings

In [8]:
embeddings = download_hugging_face_embeddings(model_name)

  warn_deprecated(


In [9]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [16]:

# result = embeddings.embed_query("how are you")
# len(result)

In [17]:
# from langchain_pinecone import PineconeVectorStore
# index_name = "medical-chat-bot"
# docsearch = PineconeVectorStore.from_documents(text_chunks, embeddings, index_name=index_name)

In [10]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)


In [11]:
query = "what are allergies"
retriever = vectorstore.as_retriever(search_type="mmr")
matched_docs = retriever.invoke(query)
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i+1}\n")
    print(d.page_content)


## Document 1

American Academy of Ophthalmology. 655 Beach Street, PO
Box 7424, San Francisco, CA 94120-7424. <http://www.eyenet.org>.KEY TERMS
Allergen —A substance capable of inducing an
allergic response.
Allergic reaction —An immune system reaction to
a substance in the environment; symptomsinclude rash, inflammation, sneezing, itchy wateryeyes, and runny nose.
Conjunctiva —The mucous membrane that covers
the white part of the eyes and lines the eyelids.
Edema —A condition where tissues contain exces-

## Document 2

aller-gies and hypersensitivity to foods, chemicals, and otheragents. Other tests for food allergies are the eliminationand rotation diets, in which foods are systematically eval-uated to determine the ones that are causing problems.

## Document 3

causes much more severe symptoms and generally afever. Allergies to molds or pollens also can make the
nose run. Allergies are usually more persistent than thecommon cold. An allergist can do tests to determine ifthe cold

In [13]:
prompt_template = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

Use the following pieces of information to answer the user's question. If you don't know the answer, just say you don't know, don't try to make up an answer.

Context: {context}
"""

In [14]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [16]:
llm = CTransformers(model=r"../model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={"max_new_tokens" : 512,
                            'temperature': 0.2}
                    )

In [35]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")


In [24]:
llm.invoke("what is attention in genai")

AIMessage(content='## Attention in Generative AI: Focusing on What Matters\n\nIn the world of Generative AI, "attention" is a powerful mechanism that mimics how humans focus on specific parts of information when processing it. Just like we pay more attention to certain words in a sentence to understand its meaning, attention mechanisms in AI models help them focus on the most relevant parts of the input data to generate better outputs.\n\nHere\'s a breakdown:\n\n**Imagine you\'re translating a sentence:** "The cat sat on the mat, which was blue."\n\n* **Without attention:** A basic model might process the entire sentence equally, potentially getting confused by the "which was blue" clause when translating "the cat."\n* **With attention:** The model would learn to pay more "attention" to the words directly related to "the cat" (like "sat" and "mat"), effectively ignoring the less relevant parts for that translation step.\n\n**How does it work?**\n\nAttention mechanisms utilize a system 

In [17]:
qa = RetrievalQA.from_chain_type(llm = llm,
                                 chain_type="stuff",
                                 retriever = vectorstore.as_retriever(search_kwarg = {'k': 2}),
                                 return_source_documents = True,
                                 chain_type_kwargs= chain_type_kwargs
                                 )

In [18]:
invoked = qa.invoke(query)

In [19]:
print(invoked['result'])

ment' ment, such as a condition include patches that can be used to determine whether or environmental medicine.
ment' ment.
ment.
ment.
ment, including the immunease of the immunitself-ment for food allergications.
ment.
ment' ment, ment, and chemicalsugly include skin prenhistment as a condition include patches that may be used to determine whether or environmental medicine.
ment'
ment' ment.
ment in the medical establishments.
ment.
ment, such as a condition.
ment.
ment' ment, ment.
ment' ment, ment, ment and environmental allergications, such as well-ment' ment.
ment' ment,
ment.
ment.
ment,
ment for the immunease of the medical establishments in San Francisco, including blood tests for food allergy may include patches a condition are available to determine whether or other symptoms, ment and environmental medicine, such as an allergications.
ment'
ment.
ment as well-ment' ment, ment, ment, ment, ment.
ment.
ment.
ment.
ment.
ment.
ment for the immunease of the medical condition in

In [28]:
import sys
while True:
    user_input = input(f'input Prompt: ')
    if user_input == 'exit':
        print('Exiting....')
        sys.exit()
    if user_input == '':
        continue
    result = qa.invoke({'query': user_input})
    print(f"query: {result['query']} \nanswer: {result['result']}")

query: what are acne 
answer: I'm sorry, but the provided context does not contain information about acne. 



Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


query: what are acne 
answer: The provided text does not contain the answer to what acne is. 

Exiting....


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
