In [1]:
from langchain import PromptTemplate        
from langchain.chains import RetrievalQA                
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter      #for creating chunks
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers
from sentence_transformers import SentenceTransformer

from pinecone import Pinecone, ServerlessSpec
import os
from langchain_pinecone import PineconeVectorStore

  from tqdm.autonotebook import tqdm


In [2]:
# PINCONE_API_KEY="dfad884a-1a06-493c-850a-1de28db1b2dc"
# PINCONE_API_ENV=""
pc = Pinecone(api_key="dfad884a-1a06-493c-850a-1de28db1b2dc")

os.environ['PINECONE_API_KEY'] = 'dfad884a-1a06-493c-850a-1de28db1b2dc'

index_name= "medical-chatbot"

if index_name not in pc.list_indexes().names():
  pc.create_index(
  name="medical-chatbot",
  dimension=384,
  metric="cosine",
  spec=ServerlessSpec(
    cloud="aws",
    region="us-east-1"
  )
)

In [3]:
# Data extraction from pdf

def load_pdf(data):
    loader= DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents= loader.load()

    return documents

In [4]:
extracted_data = load_pdf("../data/")

In [5]:
# extracted_data

In [6]:
# Creating text Chunks so that the entire pdf is splitted into smaller texts

def text_split(extracted_data):
    # keeping size of each chunk to be 500 tokens with overlap b/w chunks as 20 tokens
    text_splitter= RecursiveCharacterTextSplitter(chunk_size= 500, chunk_overlap = 20)
    text_chunks= text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks=text_split(extracted_data)
print("No of chunks formed:", len(text_chunks))

No of chunks formed: 10484


In [8]:
# text_chunks

In [9]:
# Convert these chunks made into vector

# Download the embeddig model

def download_hugging_face_embedding():
    embedding= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [10]:
embeddings= download_hugging_face_embedding()

  return torch._C._cuda_getDeviceCount() > 0


In [11]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [12]:
query_result = embeddings.embed_query("Hello World")
print("Length of embedding:", len(query_result))

Length of embedding: 384


In [13]:
sentences = ["Hello World"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings_test = model.encode(sentences)
print(embeddings_test)



[[-3.44772786e-02  3.10232472e-02  6.73494395e-03  2.61089504e-02
  -3.93620282e-02 -1.60302520e-01  6.69240132e-02 -6.44146046e-03
  -4.74504344e-02  1.47588439e-02  7.08752722e-02  5.55275828e-02
   1.91933177e-02 -2.62513068e-02 -1.01095820e-02 -2.69404948e-02
   2.23074220e-02 -2.22266205e-02 -1.49692625e-01 -1.74930543e-02
   7.67621724e-03  5.43522872e-02  3.25441500e-03  3.17259245e-02
  -8.46213996e-02 -2.94059850e-02  5.15956096e-02  4.81240042e-02
  -3.31481802e-03 -5.82791828e-02  4.19692621e-02  2.22106446e-02
   1.28188819e-01 -2.23389063e-02 -1.16563132e-02  6.29283562e-02
  -3.28763314e-02 -9.12260562e-02 -3.11753340e-02  5.26995584e-02
   4.70348336e-02 -8.42030644e-02 -3.00561897e-02 -2.07448583e-02
   9.51783918e-03 -3.72184161e-03  7.34333321e-03  3.93242836e-02
   9.32740122e-02 -3.78858577e-03 -5.27421311e-02 -5.80582172e-02
  -6.86441176e-03  5.28323185e-03  8.28930065e-02  1.93627737e-02
   6.28452981e-03 -1.03307664e-02  9.03239660e-03 -3.76837738e-02
  -4.52059

In [14]:
query_result   # Vector representation of "Hello World"

[-0.03447727859020233,
 0.03102324716746807,
 0.006734943948686123,
 0.02610895037651062,
 -0.03936202824115753,
 -0.1603025197982788,
 0.06692401319742203,
 -0.006441460456699133,
 -0.047450434416532516,
 0.014758843928575516,
 0.07087527215480804,
 0.05552758276462555,
 0.019193317741155624,
 -0.026251306757330894,
 -0.010109581984579563,
 -0.026940494775772095,
 0.022307422012090683,
 -0.022226620465517044,
 -0.1496926248073578,
 -0.017493054270744324,
 0.0076762172393500805,
 0.0543522872030735,
 0.00325441500172019,
 0.03172592446208,
 -0.08462139964103699,
 -0.02940598502755165,
 0.05159560963511467,
 0.048124004155397415,
 -0.00331481802277267,
 -0.05827918276190758,
 0.041969262063503265,
 0.02221064455807209,
 0.128188818693161,
 -0.022338906303048134,
 -0.011656313203275204,
 0.06292835623025894,
 -0.032876331359148026,
 -0.09122605621814728,
 -0.031175334006547928,
 0.052699558436870575,
 0.04703483358025551,
 -0.08420306444168091,
 -0.030056189745664597,
 -0.020744858309626

In [15]:

# index = pc.list_indexes().names()

# # Function to check if vectors exist in the index
# def vectors_exist(index_name, vector_ids):
#     try:
#         # Fetch the existing vectors by their IDs
#         existing_vectors = index_name.fetch(ids=vector_ids)
#         return len(existing_vectors["vectors"]) > 0
#     except Exception as e:
#         print(f"Error checking vectors: {e}")
#         return False

# if index_name not in pc.list_indexes().names():
#     vectorstore_from_docs = PineconeVectorStore.from_documents(
#         text_chunks,
#         index_name=index_name,
#         embedding=embeddings
#     )

In [16]:
# made_index=pc.describe_index("medical-chatbot")
# len(made_index)

In [17]:
# converting and uploading vectors to pincone index

# vectorstore_from_docs = PineconeVectorStore.from_documents(
#         text_chunks,
#         index_name=index_name,
#         embedding=embeddings
#     )

In [18]:
# Successfully stored in Pincone Vector DB
index_name="medical-chatbot"

vectorstore_from_docs = PineconeVectorStore.from_existing_index(index_name, embeddings)

In [19]:
# Searching for a query and giving top 3 results (using cosine similarity) from the stored vectors [Ranked Results]

query= "What are Allergies?"
answer=vectorstore_from_docs.similarity_search(query, k=3)
print("Result obtained:",answer)

Result obtained: [Document(page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 2591Physical allergy', metadata={'page': 297.0, 'source': 'data/Medical-Book.pdf'}), Document(page_content='KEY TERMS\nAllergen —Any substance that irritates only those\nwho are sensitive (allergic) to it.\nAsthma —Wheezing (labored breathing) due to\nallergies or irritation of the lungs.\nDecongestant —Medicines that shrink blood ves-\nsels and consequently mucus membranes. Pseu-doephedrine, phenylephrine, and phenylpropano-lamine are the most common.\nSinus —Air-filled cavities surrounding the eyes and', metadata={'page': 22.0, 'source': 'data/Medical-Book.pdf'}), Document(page_content='allergic or bacterial rhinitis and sinusitis. An over-the-counter antihistamine such as Actifed may provide relief.\nIf allergies cause anosmia, adjustments should be\nmade to avoid allergens. If dust causes allergies, careshould be taken to clean areas such as the bedroom.\nAntibiotics may be prescribed for infections.\nOther me

In [20]:
# Now to have a correct answer from top 3 answers to a query, we use our LLM model

# Creating a prompt
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [21]:

PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [22]:
try:
    llm = CTransformers(
        model='../model/llama-2-7b-chat.ggmlv3.q2_K.bin',
        model_type="llama",
    )
    print("Model loaded successfully.")
except Exception as e:
    print(f"Failed to load the model: {e}")



Model loaded successfully.


In [23]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever= vectorstore_from_docs.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [24]:
while True:
    user_input = input("Input Prompt: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Exiting the loop.")
        break
    result = qa({"query": user_input})
    print("Response: ", result["result"])

Exiting the loop.
