In [None]:
from langchain import PromptTemplate
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from pinecone import Pinecone, ServerlessSpec
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
import os
from tqdm.autonotebook import tqdm
import sys
sys.path.append('../../')

In [None]:
ROOT_DIR = os.path.abspath('..')

In [None]:
PINECONE_INDEX_NAME = "medical-chatbot"

In [None]:
os.environ['PINECONE_API_KEY'] = "YOUR_PINECONE_API_KEY"
os.environ['GROQ_API_KEY'] = "YOUR_GROQ_API_KEY"

In [None]:
def load_data(data_path):
    loader = DirectoryLoader(data_path, glob='*.pdf', loader_cls=PyPDFLoader)
    data = loader.load()
    return data

In [None]:
data_path = os.path.join(ROOT_DIR, 'data')
data = load_data(data_path)

In [None]:
def text_split(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = splitter.split_documents(data)
    return text_chunks

In [None]:
text_chunks = text_split(data)
print(len(text_chunks))

In [None]:
def download_huggingface_embedding():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [None]:
embeddings = download_huggingface_embedding()

In [None]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
    text_chunks,
    index_name=PINECONE_INDEX_NAME,
    embedding=embeddings
)

In [None]:
docsearch = PineconeVectorStore.from_existing_index(PINECONE_INDEX_NAME, embeddings)

In [None]:
query = "What are Allergies?"
docs = docsearch.similarity_search(query, k=3)
print(docs)

In [None]:
prompt_template = """
Use the given information context to give appropriate answer for the user's question.
If you don't know the answer, just say that you don't know the answer, but don't make up an answer.
Context: {context}
Question: {question}
Only return the appropriate answer and nothing else.
Helpful answer:
"""

In [None]:
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": prompt}

In [None]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.7,
    max_tokens=512,
    api_key=os.environ.get("GROQ_API_KEY")
)

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
result = qa.invoke("What is Acne?")
print("Answer:", result['result'])
print("\nSource Documents:")
for i, doc in enumerate(result['source_documents']):
    print(f"Document {i+1}: {doc.page_content[:200]}...")

In [None]:
result = qa.invoke("What are the symptoms of diabetes?")
print("Answer:", result['result'])
print("\nSource Documents:")
for i, doc in enumerate(result['source_documents']):
    print(f"Document {i+1}: {doc.page_content[:200]}...")

In [None]:
result = qa.invoke("How to treat high blood pressure?")
print("Answer:", result['result'])
print("\nSource Documents:")
for i, doc in enumerate(result['source_documents']):
    print(f"Document {i+1}: {doc.page_content[:200]}...")

In [None]:
def ask_medical_question(question):
    result = qa.invoke(question)
    return {
        'answer': result['result'],
        'sources': [doc.page_content[:200] + '...' for doc in result['source_documents']]
    }

In [None]:
questions = [
    "What causes heart disease?",
    "What are the side effects of aspirin?",
    "How to prevent cancer?",
    "What is the treatment for asthma?"
]

for question in questions:
    print(f"\nQuestion: {question}")
    response = ask_medical_question(question)
    print(f"Answer: {response['answer']}")
    print("Sources:")
    for i, source in enumerate(response['sources']):
        print(f"  {i+1}. {source}")
    print("-" * 80)