In [None]:
!pip install langchain langchain-community fastapi uvicorn nest_asyncio unstructured chromadb


In [None]:
# import os
# from unstructured.partition.pdf import partition_pdf
# from base64 import b64decode
# from langchain_community.vectorstores import Chroma
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain_core.runnables import RunnablePassthrough, RunnableLambda
# from langchain_core.output_parsers import StrOutputParser
# from langchain_groq import ChatGroq
# from langchain_core.documents import Document

# os.environ["GROQ_API_KEY"] = "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"

# def parse_docs(docs):
#     b64 = []
#     text = []
#     for doc in docs:
#         try:
#             b64decode(doc)
#             b64.append(doc)
#         except Exception:
#             text.append(doc)
#     return {"images": b64, "texts": text}

# def build_prompt(kwargs):
#     docs_by_type = kwargs["context"]
#     user_question = kwargs["question"]

#     context_text = ""
#     if len(docs_by_type["texts"]) > 0:
#         for text_element in docs_by_type["texts"]:
#             context_text += str(text_element)

#     prompt_template = f"""
#     Answer the question based only on the following context, which can include text, tables, and the below image.
#     Context: {context_text}
#     Question: {user_question}
#     """
#     return prompt_template

# def process_pdf_and_answer(question, pdf_path):
#     chunks = partition_pdf(
#         filename=pdf_path,
#         infer_table_structure=True,
#         strategy="hi_res",
#         extract_image_block_types=["image"],
#         extract_image_block_to_payload=True,
#         chunking_strategy="by_title",
#         max_characters=10000,
#         combine_text_under_n_chars=2000,
#         new_after_n_chars=6000,
#     )

#     documents = [Document(page.text, metadata={"page_number": page.metadata.page_number}) for page in chunks]

#     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#     split_docs = splitter.split_documents(documents)

#     vectorstore = Chroma.from_documents(split_docs, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))
#     retriever = vectorstore.as_retriever()

#     chain = (
#         {
#             "context": retriever | RunnableLambda(parse_docs),
#             "question": RunnablePassthrough(),
#         }
#         | RunnableLambda(build_prompt)
#         | ChatGroq(model="llama3-8b-8192")
#         | StrOutputParser()
#     )

#     answer = chain.invoke(question)
#     return answer


In [None]:
# from fastapi import FastAPI, File, UploadFile, Form, Query
# from fastapi.responses import JSONResponse
# from typing import Optional
# import shutil
# import os
# from rag_pipeline import process_pdf_and_answer

# app = FastAPI()

# @app.post("/chat-with-pdf/")
# async def chat_with_pdf(
#     question: str = Form(...),
#     pdf_file: Optional[UploadFile] = File(None),
#     doc_name: Optional[str] = Query(None)
# ):
#     if pdf_file:
#         file_location = f"./uploaded_pdfs/{pdf_file.filename}"
#         os.makedirs(os.path.dirname(file_location), exist_ok=True)
#         with open(file_location, "wb") as f:
#             shutil.copyfileobj(pdf_file.file, f)
#         response = process_pdf_and_answer(question, file_location)
#     elif doc_name:
#         existing_file_path = f"./uploaded_pdfs/{doc_name}"
#         if not os.path.exists(existing_file_path):
#             return JSONResponse(status_code=404, content={"message": "Document not found."})
#         response = process_pdf_and_answer(question, existing_file_path)
#     else:
#         return JSONResponse(status_code=400, content={"message": "Provide either a PDF file or document name."})

#     return {"question": question, "answer": response}


In [14]:
import os
from unstructured.partition.pdf import partition_pdf
from base64 import b64decode
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq
from langchain_core.documents import Document

os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual key

def parse_docs(docs):
    b64 = []
    text = []
    for doc in docs:
        try:
            b64decode(doc)
            b64.append(doc)
        except Exception:
            text.append(doc)
    return {"images": b64, "texts": text}

def build_prompt(kwargs):
    docs_by_type = kwargs["context"]
    user_question = kwargs["question"]

    context_text = ""
    for text_element in docs_by_type["texts"]:
        context_text += str(text_element) + "\n"

    # If any base64 images are found, embed them using <img> tag
    for image in docs_by_type["images"]:
        context_text += f"\n[Image]: <img src='data:image/png;base64,{image}' />"

    prompt_template = f"""
You are an intelligent assistant that understands and interprets both text and visual information (e.g., tables, charts).
Answer the following question using the given context, which may contain images embedded in base64.

Context: {context_text}
Question: {user_question}
"""

    return prompt_template

def process_pdf_and_answer(question, pdf_path):
    chunks = partition_pdf(
        filename=pdf_path,
        infer_table_structure=True,
        strategy="hi_res",
        extract_image_block_types=["image"],
        extract_image_block_to_payload=True,
        chunking_strategy="by_title",
        max_characters=10000,
        combine_text_under_n_chars=2000,
        new_after_n_chars=6000,
    )

    documents = []
    for chunk in chunks:
        content = chunk.text or ""
        # Append image data as base64 inside the text
        if hasattr(chunk, "image") and chunk.image and "data" in chunk.image:
            image_data = chunk.image["data"]
            # Add a marker so we can extract it later
            content += f"\n[[IMAGE_BASE64]]{image_data}"
        documents.append(Document(content, metadata={"page_number": getattr(chunk.metadata, "page_number", 0)}))

    # Prepare documents for vector search
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    split_docs = splitter.split_documents(documents)

    # Store in vector DB
    vectorstore = Chroma.from_documents(split_docs, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))
    retriever = vectorstore.as_retriever()

    def custom_parser(docs):
        b64_images = []
        clean_texts = []
        for doc in docs:
            if "[[IMAGE_BASE64]]" in doc:
                text_part, *images = doc.split("[[IMAGE_BASE64]]")
                clean_texts.append(text_part.strip())
                for img in images:
                    b64_images.append(img.strip())
            else:
                clean_texts.append(doc)
        return {"images": b64_images, "texts": clean_texts}

    chain = (
        {
            "context": retriever | RunnableLambda(custom_parser),
            "question": RunnablePassthrough(),
        }
        | RunnableLambda(build_prompt)
        | ChatGroq(model="llama3-8b-8192")
        | StrOutputParser()
    )

    answer = chain.invoke(question)
    return answer


In [15]:
from fastapi import FastAPI, File, UploadFile, Form, Query
from fastapi.responses import JSONResponse
from typing import Optional
import shutil

app = FastAPI()

@app.post("/chat-with-pdf/")
async def chat_with_pdf(
    question: str = Form(...),
    pdf_file: Optional[UploadFile] = File(None),
    doc_name: Optional[str] = Query(None)
):
    if pdf_file:
        file_location = f"./uploaded_pdfs/{pdf_file.filename}"
        os.makedirs(os.path.dirname(file_location), exist_ok=True)
        with open(file_location, "wb") as f:
            shutil.copyfileobj(pdf_file.file, f)
        response = process_pdf_and_answer(question, file_location)
    elif doc_name:
        existing_file_path = f"./uploaded_pdfs/{doc_name}"
        if not os.path.exists(existing_file_path):
            return JSONResponse(status_code=404, content={"message": "Document not found."})
        response = process_pdf_and_answer(question, existing_file_path)
    else:
        return JSONResponse(status_code=400, content={"message": "Provide either a PDF file or document name."})

    return {"question": question, "answer": response}


In [16]:
import nest_asyncio
import uvicorn

nest_asyncio.apply()
uvicorn.run(app, host="localhost", port=8000)


INFO:     Started server process [6472]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)


INFO:     ::1:54878 - "GET /docs HTTP/1.1" 200 OK
INFO:     ::1:54878 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     ::1:54879 - "POST /chat-with-pdf/?doc_name=Attention.pdf HTTP/1.1" 200 OK
INFO:     ::1:54896 - "POST /chat-with-pdf/?doc_name=Attention.pdf HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [6472]
