In [None]:
#  Imports & Setup
import os
from dotenv import load_dotenv
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

#  Load environment
load_dotenv()
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

#  Step 1: Load PDF Documents
def load_pdf_file(data_path):
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    return loader.load()

extracted_data = load_pdf_file("Data/")

#  Step 2: Split into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
text_chunks = text_splitter.split_documents(extracted_data)
print("Chunks:", len(text_chunks))

#  Step 3: Download Sentence Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

#  Step 4: Setup Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "medicalbot"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

#  Step 5: Push data to Pinecone
vectorstore = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

#  Step 6: Set retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

#  Step 7: Local LLM using HuggingFace
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

class LocalLLMWrapper:
    def __init__(self, pipe):
        self.pipe = pipe
    def invoke(self, prompt):
        return self.pipe(prompt, max_new_tokens=100)[0]["generated_text"]

local_llm = LocalLLMWrapper(pipe)

#  Step 8: Build the RAG pipeline
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following retrieved context to answer the question. "
    "If you don't know the answer, say 'I don't know'. "
    "Be concise and clear (max 3 sentences).\n\n{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

question_answer_chain = create_stuff_documents_chain(local_llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

#  Step 9: Ask questions
questions = [
    "What is Acromegaly and gigantism?",
    "What is acne?",
    "What is diabetes?",
    "What is statistics?"
]

for q in questions:
    response = rag_chain.invoke({"input": q})
    print(f"\n Q: {q}\n A: {response['answer']}")

In [None]:
 #output 
 Q: What is Acromegaly and gigantism?
 A: Acromegaly and gigantism are disorders that involve overproduction of growth hormone...

 Q: What is statistics?
 A: I don't know.