In [None]:
print("OK!")

OK!


In [7]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyMuPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os
from langchain.chains import RetrievalQA

In [None]:
PINECONE_API_KEY = ""


In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data, 
                             glob="*.pdf",
                             loader_cls=PyMuPDFLoader)
    documents = loader.load()
    return documents

In [65]:
extracted_data= load_pdf("data/")

In [46]:
#extracted_data

In [66]:
#Create text chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [67]:
text_chunks = text_split(extracted_data)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 6972


In [68]:
#download the embeddings model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [69]:
embeddings= download_hugging_face_embeddings()

In [70]:
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [71]:
query_results = embeddings.embed_query("Hello world")
print("length", len(query_results))

length 384


In [53]:
#query_results

In [None]:
from pinecone import Pinecone as PineconeClient, ServerlessSpec
from langchain_pinecone import Pinecone
from langchain_huggingface import HuggingFaceEmbeddings

# --- Pinecone Configuration ---
PINECONE_API_KEY = ""

PINECONE_CLOUD = "aws"
PINECONE_REGION = "us-east-2"

# --- Hugging Face Embeddings Setup ---
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

embedding_dimension = 384
# Initialize the Pinecone client
pc = PineconeClient(api_key=PINECONE_API_KEY)

# Define index name
index_name = "medical-chatbot1"

# Check if index exists and create if not
if index_name not in pc.list_indexes().names():
    print(f"Creating Pinecone index: {index_name} with dimension {embedding_dimension}")
    pc.create_index(
        name=index_name,
        dimension=embedding_dimension,
        metric='cosine',
        # Use ServerlessSpec directly
        spec=ServerlessSpec(cloud=PINECONE_CLOUD, region=PINECONE_REGION)
    )
    print("Index created. Waiting for index to be ready...")
    import time
    time.sleep(60)

# LangChain's Pinecone class to add texts
pinecone_vectorstore = Pinecone(
    index_name=index_name,
    embedding=embeddings,
    pinecone_api_key=PINECONE_API_KEY,
)

# Creating Embeddings for each of the Text Chunks and storing it
docsearch = pinecone_vectorstore.add_texts([t.page_content for t in text_chunks])

print("Embeddings created and added to Pinecone index.")

Embeddings created and added to Pinecone index.


In [None]:
from langchain_pinecone import Pinecone
from langchain_huggingface import HuggingFaceEmbeddings
import os 

PINECONE_API_KEY = ""
PINECONE_CLOUD = "aws"
PINECONE_REGION = "us-east-2"
index_name = "medical-chatbot1"

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

# Embedding Model Setup (for query embedding)
hf_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=hf_model_name)

# Loading an existing index
docsearch = Pinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings,
)

query = "What are allergies"

docs = docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(id='81c0abdb-bf91-4c10-84b2-d634537072f5', metadata={}, page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.\neyenet.org>.\nKEY TERMS\nAllergen—A substance capable of inducing an\nallergic response.\nAllergic reaction—An immune system reaction to\na substance in the environment; symptoms\ninclude rash, inflammation, sneezing, itchy watery\neyes, and runny nose.\nConjunctiva—The mucous membrane that covers\nthe white part of the eyes and lines the eyelids.'), Document(id='9ebb534d-12ea-45a7-8a31-59d292fcbce1', metadata={}, page_content='Although environmental medicine is gaining more\nrespect within conventional medicine, detoxification\nKEY TERMS\nAllergen—A foreign substance, such as mites in\nhouse dust or animal dander, that when\ninhaled,causes the airways to narrow and pro-\nduces symptoms of asthma.\nAntibody—A protein, also called immunoglobu-\nlin, produced by immune system ce

In [11]:
prompt_template="""
Use the following pieces of information to answer the question at the user's question.
If you don't know the answer, say so, don't try to make something up.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [12]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [13]:
llm=CTransformers(model="Model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={
                      'max_new_tokens':512,
                      'temperature':0.8
                      }
                      )




In [14]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [15]:
while True:
    user_input = input(f"Input Prompt:")
    result = qa.invoke({"query": user_input})
    print("Response: ", result["result"])

Response:  Acne is a skin condition characterized by inflammation of the oil glands in the skin, resulting in redness, pimples, and/or nodules on the face and body.


KeyboardInterrupt: 