In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [3]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [4]:
ext_data = load_pdf("Data/")

In [5]:
def text_split(ext_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(ext_data)

    return text_chunks

In [6]:
text_chunks = text_split(ext_data)

In [7]:
print(len(text_chunks))

7020


In [8]:
# #download embedding model
# def download_hugging_face_embeddings():
#     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
#     return embeddings
# embeddings = download_hugging_face_embeddings()

In [9]:
from dotenv import load_dotenv
load_dotenv()
import os

In [10]:
PINECONE1_API_KEY = os.environ.get('PINECONE1_API_KEY')


In [11]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE1_API_KEY)
index = pc.Index("medicalchatbot")

In [12]:
from sentence_transformers import SentenceTransformer

# Function to download the Hugging Face embedding model
def download_hugging_face_embeddings():
    embeddings = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

# Load the Hugging Face embedding model
model = download_hugging_face_embeddings()

In [13]:


# ### Upserting data with meta data
# # Extract the data from text chunks

# cleaned_data = [t.page_content for t in text_chunks][2000:3000]

# # Generate embeddings for each text chunk
# embeddings = model.encode(cleaned_data)

# # Formatting for upserting

# entries = []
# for i, embedding in enumerate(embeddings):
#     entry = {
#         "id": f"text-{i+3000}",  # Replace with your ID format
#         "values": embedding.tolist(),
#         "metadata": {"context":cleaned_data[i]}  # Assuming metadata is provided in the same order as embeddings
#     }
#     entries.append(entry)

# index.upsert(vectors=entries)

 Querying

In [14]:
query_text = "Tell me about JaThe Doppler effect"


In [15]:
import torch
import numpy as np

# Assuming you have already embedded and stored embeddings for this query text
# Generate the embedding for the query text (you would typically do this in the embedding step)
query_embedding = model.encode([query_text])[0]

# Ensure query embedding is a list of floats
if isinstance(query_embedding, np.ndarray):
    query_embedding = query_embedding.tolist()
elif isinstance(query_embedding, torch.Tensor):
    query_embedding = query_embedding.tolist()
else:
    raise ValueError(f"Unexpected embedding type: {type(query_embedding)}")



In [16]:
# Search for similar embeddings
results = index.query(vector=query_embedding, top_k=10,include_metadata=True)



In [17]:
final_result = [result['metadata']['context'] for result in results['matches']]
context = ((" ".join(final_result)).replace('\n','')).strip()
context


'Doppler —The Doppler effect refers to the apparentchange in frequency of sound wave echoes returningto a stationary source from a moving target. If theobject is moving toward the source, the frequencyincreases; if the object is moving away, the frequen-cy decreases. The size of this frequency shift can beused to compute the object’s speed—be it a car onthe road or blood in an artery. The Doppler effectholds true for all types of radiation, not just sound. capability of accurately measuring velocities of movingmaterial, such as blood in arteries and veins. The prin-ciple is the same as that used in radar guns that measurethe speed of a car on the highway. Doppler capability ismost often combined with B-mode scanning to produceimages of blood vessels from which blood flow can bedirectly measured. This technique is used extensively toinvestigate valve defects, arteriosclerosis and hyper-tension , particularly in the heart, but also in the abdom- ing tissue by disrupting and destroying in

In [24]:
# # Assuming you have loaded your model as
# from langchain.llms import CTransformers

# llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
#                     model_type="llama",
#                     config={'max_new_tokens': 512,
#                             'temperature': 0.8})

# # Generate response
# response = llm(prompt)

# print(response)
# from langchain.llms import CTransformers
# from langchain.prompts import PromptTemplate

In [18]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate

prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}

Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

# Load the model
llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={'max_new_tokens': 512,
                            'temperature': 0.3})


# Define the user's question
question = "What is dopler effect tell me in 20 words "

# Format the prompt
formatted_prompt = PROMPT.format(context=context, question=question)



In [19]:
print(formatted_prompt)


Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: Doppler —The Doppler effect refers to the apparentchange in frequency of sound wave echoes returningto a stationary source from a moving target. If theobject is moving toward the source, the frequencyincreases; if the object is moving away, the frequen-cy decreases. The size of this frequency shift can beused to compute the object’s speed—be it a car onthe road or blood in an artery. The Doppler effectholds true for all types of radiation, not just sound. capability of accurately measuring velocities of movingmaterial, such as blood in arteries and veins. The prin-ciple is the same as that used in radar guns that measurethe speed of a car on the highway. Doppler capability ismost often combined with B-mode scanning to produceimages of blood vessels from which blood flow can bedirectly measured. This technique is us

In [20]:
# Generate response
response = llm(formatted_prompt)

print(response)

The Dopplications such as a)
The Dopproentire,
The Dopplication.
The Dopp.
The Dopplication of blood flowing a)
The Dopplication.
The Dopplication,
The Dopp.
The Dopp.
The Doppppl
The Dopprojecteducation, The Dopplications,
The Dopplication.
The Dopp 
The Dopppppplication of blood flowing the Doppppppppppls
The Dopplications,
The Dopppppp.
The Dopplication of course,
The Dopplications,
The Dopppprobotan Alexander technique is a)
The Dopp.
The Dopp
The Dopppppplications,
The Dopplication.
The Dopp.
The Dopppppppppplication,
The Dopppplications of course,
The Dopp
The Dopplication or mechanically injecteducation and hyper- The Dopprojecting the Dopplication.
The Dopp.
The Dopppppppppplication,
The Dopplication,
The Dopplications,
The Dopp.
The Dopplication of blood pressure,
The Dopp.
The Dopp.
The Dopppppplication.
The Dopppprobotan increase in the Dopp.
The Dopp.
The Dopppppp.
The Dopplication. The Dopplications,
The Dopp.
The Dopplication of course,
The Dopplications as pertainting a)