In [1]:
print("Hello, world!")

Hello, world!


In [17]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone


In [3]:
import os
def load_pdf(data):
    documents = []
    for file in os.listdir(data):
        if file.endswith(".pdf"):
            loader = PyPDFLoader(os.path.join(data, file))
            documents.extend(loader.load())
    return documents

In [10]:
extracted_data = load_pdf("/Users/mac/Medical-ChatBot/data")

In [11]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [12]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 5859


In [13]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [14]:
embeddings = download_hugging_face_embeddings()

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [19]:
from dotenv import load_dotenv
import os

load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


In [16]:
from langchain_pinecone import PineconeVectorStore

index_name = "medical-chatbot"
# 3. Use LangChain PineconeVectorStore
vectorstore = PineconeVectorStore.from_documents(
    documents=text_chunks,   # ðŸ‘ˆ directly pass your LangChain Documents
    embedding=embeddings,
    index_name=index_name,
    namespace="example-namespace",   # optional
)

print("âœ… Documents successfully upserted into Pinecone!")

âœ… Documents successfully upserted into Pinecone!


In [18]:
# Define the query
query = "What is Allergies"
# Initialize Pinecone
pc = Pinecone()
dense_index = pc.Index(index_name)

# Search the dense index
results = dense_index.search(
    namespace="example-namespace",
    query={
        "top_k": 3,
        "inputs": {
            'text': query
        }
    }
)

# Print the results
# Iterate over Pinecone search results
for hit in results.to_dict()["result"]["hits"]:
    _id = hit.get("_id", "")
    score = round(hit.get("_score", 3), 3)
    fields = hit.get("fields", {})

    # Grab the fields you care about
    page = fields.get("page", "")
    page_label = fields.get("page_label", "")
    text = fields.get("text", "").replace("\n", " ")[:300]  # truncate to 300 chars

    print(f"ID: {_id}")
    print(f"Score: {score}")
    print(f"Page: {page_label or page}")
    print(f"Text: {text}")
    print("-" * 100)




ID: fe23b8d0-7059-4710-8f62-cac1ad5af890
Score: 0.156
Page: 37
Text: Achromatopsia see Color blindness Acid indigestion see Heartburn Acid phosphatase test Definition Acid phosphatase is an enzyme found throughout the body, but primarily in the prostate gland. Like all enzymes, it is needed to trigger specific chemical reac- tions. Acid phosphatase testing is done to
----------------------------------------------------------------------------------------------------
ID: 37403d58-2429-433a-84a1-159172fd892d
Score: 0.15
Page: 348
Text: sia, and mixed transcortical aphasia. All of the transcor- tical aphasias are distinguished from other types by the individualâ€™s ability to repeat words, phrases, or sen- tences. Other language functions may also be impaired to varying degrees, depending on the extent and partic- ular location of br
----------------------------------------------------------------------------------------------------
ID: 1206af05-7b8b-4f54-b4e7-975ca02bf237
Score: 0.145
Pa

In [20]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [21]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [26]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY)

In [49]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])