In [1]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_pdf_file(directory_path):
    # Load all PDF files from the specified directory
    loader = DirectoryLoader(directory_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load() 
    return documents

In [None]:
extracted_data_mc = load_pdf_file("../test")
extracted_data = load_pdf_file("../data")

In [4]:
from typing import List
from langchain.schema import Document   



In [28]:
def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [50]:
minimal_docs_data = filter_to_minimal_docs(extracted_data)

In [6]:
def text_spliter(minimal_docs_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    text_chunks = text_splitter.split_documents(minimal_docs_data)
    return text_chunks

In [7]:
text_chunks = text_spliter(extracted_data_mc)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 0


In [56]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
   """
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_embeddings()

In [62]:
from dotenv import load_dotenv
import os   
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [66]:
from pinecone import Pinecone

pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)
pc

<pinecone.pinecone.Pinecone at 0x2cb0528fd90>

In [70]:
from pinecone import ServerlessSpec
index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec= ServerlessSpec(cloud="aws", region="us-east-1"),        
    )

index = pc.Index(index_name)   

In [71]:
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,   
    index_name=index_name,    
)

In [72]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    embedding=embeddings,
    index_name=index_name,
)

In [95]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3}) 

In [94]:
from langchain_groq.chat_models import ChatGroq

chat_model = ChatGroq(model="llama-3.3-70b-versatile")


In [97]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [107]:
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),  
    ("user", "{input}"),
])

In [99]:
question_answer_chain = create_stuff_documents_chain(chat_model,prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)




In [110]:
response = rag_chain.invoke({"input": "What are the symptoms of diabetes?"})
response['answer']

'The symptoms of diabetes include frequent urination, tiredness, excessive thirst, and hunger. Additional symptoms may include weight loss, fatigue, nausea, blurred vision, and frequent infections. Other symptoms can vary by gender, such as genital itching in women and impotence in men, as well as frequent yeast infections.'