# Document Q&A Chatbot -- Langchain + GROQ

In [23]:
pip install python-dotenv pinecone-client sentence-transformers langchain langchain-groq

In [27]:
import os
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as LangPinecone
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
import pdfplumber
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

In [28]:
# Load environment variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

In [31]:
# Initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)

# Define your index
index_name = "doc-qa-index"

# Check if index exists, if not create
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # all-MiniLM-L6-v2 has 384 dimensions
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

## PDF Loader + Chunking

In [40]:
# Load and chunk PDF
def load_and_split_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return splitter.create_documents([text])

# Load PDF
docs = load_and_split_pdf("ICMLA_329_Final.pdf")

## Embed and Store in Pinecone or FAISS

In [42]:
# Create embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Upload to Pinecone
vectorstore = LangPinecone.from_documents(
    documents=docs,
    embedding=embedding_model,
    index_name=index_name
)

## Ask Questions Using Groq

In [44]:
# Initialize Groq LLM
llm = ChatGroq(api_key=groq_api_key, model_name="llama3-8b-8192", temperature=0)

# Build the RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

In [50]:
# Sample query
query = "What are the key findings in the document?"
result = qa_chain.invoke(query)

# Display answer
print("\nAnswer:\n", result['result'])


Answer:
 The key findings in the document are:

1. The deep learning-based enhancement model is capable of producing diagnostically valuable results, as confirmed by the example [24, 27].
2. The application of post-processing techniques such as sharpening, gamma correction, adjusting illumination, denoising, and white balancing can significantly enhance image clarity.
3. These post-processing techniques can improve contrast, fix uneven lighting, and reduce noise, making the images easier to interpret accurately, especially in clinical settings where precision matters.
4. The hybrid strategy of combining deep learning-based enhancement with post-processing techniques provides a good middle ground, balancing clarity, resolution, and adaptability, making it suitable for practical medical imaging applications.

Overall, the key findings suggest that the combination of deep learning-based enhancement and post-processing techniques can improve image quality and make it more suitable for med