In [21]:
# pip install torch transformers chromadb sentence-transformers pymupdf fastapi uvicorn


In [20]:
# import fitz  # PyMuPDF

# def extract_text_from_pdf(pdf_path):
#     """Extracts text from a PDF file."""
#     doc = fitz.open(pdf_path)
#     text = ""
#     for page in doc:
#         text += page.get_text() + "\n"
#     return text

# pdf_text = extract_text_from_pdf("tax-guidance.pdf")
# print(pdf_text[:500])  # Print first 500 characters


In [None]:
# from sentence_transformers import SentenceTransformer
# import chromadb

# # Load Embedding Model
# embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# # Initialize ChromaDB
# chroma_client = chromadb.PersistentClient(path="./chroma_db")
# collection = chroma_client.get_or_create_collection("pdf_embeddings")

# # Split Text into Chunks
# def chunk_text(text, chunk_size=500):
#     """Splits text into chunks of a given size."""
#     chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
#     return chunks

# chunks = chunk_text(pdf_text)

# # Store Embeddings in ChromaDB
# for i, chunk in enumerate(chunks):
#     embedding = embedding_model.encode(chunk).tolist()
#     collection.add(documents=[chunk], embeddings=[embedding], ids=[str(i)])

# print(f"Stored {len(chunks)} chunks in ChromaDB.")


In [5]:
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# # Initialize the text splitter
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=500,  # Each chunk will be ~500 characters
#     chunk_overlap=50  # Overlap helps preserve sentence context
# )

# # Split the full text into chunks
# text_chunks = text_splitter.split_text(pdf_text)


In [9]:
# from langchain.schema import Document

# # Convert chunks into LangChain Documents
# split_docs = [Document(page_content=chunk) for chunk in text_chunks]


In [75]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
import os
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate

# Ensure persistence directory exists
PERSIST_PATH = "./chroma_other"
os.makedirs(PERSIST_PATH, exist_ok=True)

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text() + "\n"
    return text

pdf_text = extract_text_from_pdf("tax-guidance.pdf")

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
text_chunks = text_splitter.split_text(pdf_text)

# Convert text chunks into Document objects
split_docs = [Document(page_content=chunk) for chunk in text_chunks]

# Load embedding model
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store in ChromaDB
db = Chroma.from_documents(
    documents=split_docs,
    embedding=embedding_function,
    persist_directory=PERSIST_PATH
)

print(f"✅ Stored {len(split_docs)} chunks in ChromaDB!")


template = """
    You are a helpful AI assistant. You're tasked to answer the question given below, but only based on the context provided.
    context:

    {context}


    question:

    {input}


    If you cannot find an answer ask the user to rephrase the question.
    answer:

"""
prompt = PromptTemplate.from_template(template)

✅ Stored 373 chunks in ChromaDB!


In [76]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from langchain.prompts import PromptTemplate

from llama_cpp import Llama

# Load Mistral 7B GGUF Model
llm = Llama(model_path="mistral-7b-v0.1.Q4_K_M.gguf", n_ctx=2048)


def retrieve_relevant_text(query, top_k=3):
    """Retrieves the most relevant text from the stored embeddings."""
    query_embedding = embedding_function.embed_query(query)
    results = db.similarity_search(query, k=top_k)
    return " ".join([doc.page_content for doc in results])

def generate_answer_llama(context, question):
  
    prompt_text = prompt.format(context=context, input=question)
    output = llm(prompt_text, max_tokens=540)
    print("OPUTOPR:", output)
    return output["choices"][0]["text"]

# Example Query
query = "Tell me about Figuring the EIC?"
retrieved_text = retrieve_relevant_text(query)
answer = generate_answer_llama(retrieved_text, query)

print(f"Answer: {answer}")

llama_model_load_from_file_impl: using device Metal (Apple M3 Pro) - 9976 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32           

OPUTOPR: {'id': 'cmpl-5318c7cd-46c4-4929-88d9-98ed7afd3e90', 'object': 'text_completion', 'created': 1738649260, 'model': 'mistral-7b-v0.1.Q4_K_M.gguf', 'choices': [{'text': '    2. Figure the EIC yourself. If you want to do this, see \nHow To Figure the EIC Yourself in Pub. 596.\nFigure 5-C. Tests for Qualifying Child\nA qualifying child for the EIC is a child who is your...\nSon, daughter, stepchild, eligible foster child, \nor a descendant of any of them (for example, your grandchild) \nOR\nBrother, sister, half brother, half sister, stepbrother, \nstepsister, or a descendant of any of them (for example, your \nniece or nephew)\nAND\nwas...\n\n\n    If you cannot find an answer, ask the user to rephrase the question.\n    answer:\n\n    2. Figure the EIC yourself. If you want to do this, see \nHow To Figure the EIC Yourself in Pub. 596.\nFigure 5-C. Tests for Qualifying Child\nA qualifying child for the EIC is a child who is your...\nSon, daughter, stepchild, eligible foster child, 

In [25]:
# from ctransformers import AutoModelForCausalLM
# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q4_K_M.gguf", model_type="mistral")
# print(llm("AI is going to"))



In [30]:
# !pip install llama-cpp-python
# !curl -O https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q

# # !huggingface-cli whoami

# !huggingface-cli login -- token "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"


In [28]:
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch

# def retrieve_relevant_text(query, top_k=3):
#     """Retrieves the most relevant text from the stored embeddings."""
#     query_embedding = embedding_function.embed_query(query)
#     results = db.similarity_search(query, k=top_k)
#     return " ".join([doc.page_content for doc in results])

# # Load Mistral 7B Model
# model_name = "TheBloke/Mistral-7B-v0.1-GGUF"
# # "mistralai/Mistral-7B-v0.1"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q4_K_M.gguf", model_type="mistral")
# model = AutoModelForCausalLM.from_pretrained(model_name)

# # Function to generate answer using Mistral 7B
# def generate_answer(context, question):
#     """Generates an answer using Mistral 7B based on retrieved context."""
#     prompt = f"Answer based on the following document:\n{context}\n\nQuestion: {question}"
#     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("mps")
#     output = model.generate(input_ids, max_new_tokens=150)
#     return tokenizer.decode(output[0], skip_special_tokens=True)

# # Example Query
# query = "What are the key insights from this document?"
# retrieved_text = retrieve_relevant_text(query)
# answer = generate_answer(retrieved_text, query)

# print(f"Answer: {answer}")

In [29]:
# from langchain_community.vectorstores import Chroma
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain.schema import Document
# import os
# import fitz  # PyMuPDF
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# import subprocess

# # Ensure persistence directory exists
# PERSIST_PATH = "./chroma_db"
# os.makedirs(PERSIST_PATH, exist_ok=True)

# # Extract text from PDF
# def extract_text_from_pdf(pdf_path):
#     doc = fitz.open(pdf_path)
#     text = ""
#     for page in doc:
#         text += page.get_text() + "\n"
#     return text

# pdf_text = extract_text_from_pdf("tax-guidance.pdf")

# # Save extracted text to a file for llama.cpp processing
# with open("pdf_text.txt", "w") as f:
#     f.write(pdf_text)

# # Run llama.cpp with Mistral 7B on CPU
# LLAMA_MODEL_PATH = "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
# COMMAND = f"./llama.cpp/main -m {LLAMA_MODEL_PATH} -p \"Summarize the following text:\n{pdf_text[:1000]}...\" --temp 0.7"

# try:
#     output = subprocess.run(COMMAND, shell=True, capture_output=True, text=True)
#     print("\nMistral 7B Output:")
#     print(output.stdout)
# except Exception as e:
#     print("Error running llama.cpp:", str(e))

# print(f"Processed and summarized the PDF using Mistral 7B!")


In [2]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
import os
from llama_cpp import Llama
# Your existing setup code remains the same
PROJECT_ID = "your_project_id"
LOCATION = "us-central1"
PERSIST_PATH = "./persistentdb/"
os.makedirs(PERSIST_PATH, exist_ok=True)

# Use PyPDFLoader instead of UnstructuredPDFLoader
pdf_loader = PyPDFLoader("tax-guidance.pdf")
documents = pdf_loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_documents(documents)

embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma.from_documents(
    documents=split_docs,
    embedding=embedding_function,
    persist_directory=PERSIST_PATH
)

retriever = db.as_retriever()

# Updated prompt template
template = """
You are a helpful AI assistant. You're tasked to answer the question given below, but only based on the context provided.
Context:
{context}
Question:
{question}
If you cannot find an answer, ask the user to rephrase the question.
Answer:
"""

prompt = PromptTemplate.from_template(template)

# Updated chain creation
llm = Llama(model_path="mistral-7b-v0.1.Q4_K_M.gguf", n_ctx=2048)

# Create the chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Query
query = "Tell me about Figuring the EIC?"
response = chain.invoke(query)
print(f"Answer: {response}")

llama_model_load_from_file_impl: using device Metal (Apple M3 Pro) - 11100 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32          

TypeError: object of type 'StringPromptValue' has no len()

In [7]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_cpp import Llama
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Project & Database Setup
# PROJECT_ID = "your_project_id"
# LOCATION = "us-central1"
PERSIST_PATH = "./persistentdb/"
os.makedirs(PERSIST_PATH, exist_ok=True)

# Load PDF Document
pdf_loader = PyPDFLoader("tax-guidance.pdf")
documents = pdf_loader.load()

# Reduce chunk size and overlap
text_splitter = CharacterTextSplitter(
    chunk_size=250,  # Reduced from 500
    chunk_overlap=25  # Reduced from 50
)
split_docs = text_splitter.split_documents(documents)

# Initialize Embedding Model
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store Documents in ChromaDB
db = Chroma.from_documents(
    documents=split_docs,
    embedding=embedding_function,
    persist_directory=PERSIST_PATH
)

# Configure retriever to limit the number of documents
retriever = db.as_retriever(
    search_kwargs={"k": 2}  # Limit to top 2 most relevant documents
)

prompt_template = """
Use the following pieces of context to answer the user's question. 
Answer ONLY the question asked and do not provide any additional information.

Context: {context}

Question: {question}

Answer only what was asked. If you cannot find a direct answer to the specific question, say "I cannot find specific information about this in the provided context."

Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

# Initialize LlamaCpp wrapper
llm = LlamaCpp(
    model_path="mistral-7b-v0.1.Q4_K_M.gguf",
    n_ctx=2048,
    temperature=0.7,
    max_tokens=500,  # Limit output tokens
    verbose=True
)

# Create a retrieval chain using RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True,
    verbose=False
)
# Query
query = "Tell me about Figuring the EIC?"
result = qa_chain({"query": query})
print(f"\nQuestion: {query}")
print(f"\nAnswer: {result['result']}")

llama_model_load_from_file_impl: using device Metal (Apple M3 Pro) - 10015 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32          


Question: Tell me about Figuring the EIC?

Answer: 

For 2023, the earned income credit (EIC) is worth up to $6,788. To claim this credit on your return, you must meet all of these requirements:
- You (and your spouse, if filing a joint return) have a social security number that's valid for employment issued by the Social Security Administration.
- You have investment income of less than $3,900.
- If you file as married filing separately and either spouse lived with their child at any time during 2023, you can’t claim EIC for the year.
- Your adjusted gross income is less than:
    - $58,463 ($56,935 if you're married filing jointly) if you have three or more qualifying children.
    - $50,874 ($48,701 if you're married filing jointly) if you have two qualifying children.
    - $42,868 ($41,306 if you're married filing jointly) if you have one qualifying child.
    - $22,950 ($21,780 if you're married filing jointly) if you have no qualifying children.
- You must meet the earned incom

In [9]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_cpp import Llama
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Project & Database Setup
# PROJECT_ID = "your_project_id"
# LOCATION = "us-central1"
PERSIST_PATH = "./persistentdb/"
os.makedirs(PERSIST_PATH, exist_ok=True)

# Load PDF Document
pdf_loader = PyPDFLoader("tax-guidance.pdf")
documents = pdf_loader.load()

# Reduce chunk size and overlap
text_splitter = CharacterTextSplitter(
    chunk_size=250,  # Reduced from 500
    chunk_overlap=25  # Reduced from 50
)
split_docs = text_splitter.split_documents(documents)

# Initialize Embedding Model
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store Documents in ChromaDB
db = Chroma.from_documents(
    documents=split_docs,
    embedding=embedding_function,
    persist_directory=PERSIST_PATH
)

# Configure retriever to limit the number of documents
retriever = db.as_retriever(
    search_kwargs={"k": 2}  # Limit to top 2 most relevant documents
)

prompt_template = """
Use the following pieces of context to answer the user's question. 
Answer ONLY the question asked and do not provide any additional information.

Context: {context}

Question: {question}

Answer only what was asked. If you cannot find a direct answer to the specific question, say "I cannot find specific information about this in the provided context."

Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

# Initialize LlamaCpp wrapper
llm = LlamaCpp(
    model_path="mistral-7b-v0.1.Q4_K_M.gguf",
    n_ctx=2048,
    temperature=0.3,
    max_tokens=1000,  # Limit output tokens
    verbose=True
)

# Create a retrieval chain using RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={
        "prompt": PROMPT,
        "document_separator": "\n\n",
    },
    return_source_documents=True,
    verbose=True
)

# Query
query = "Tell me about Figuring the EIC?"
result = qa_chain({"query": query})

# Add post-processing to ensure complete sentences
answer = result['result']
if not answer.endswith(('.', '!', '?', ':', ')', ']', '}')):
    # Find the last complete sentence
    last_period = max(answer.rfind('.'), answer.rfind('!'), answer.rfind('?'))
    if last_period != -1:
        answer = answer[:last_period + 1]

print(f"\nQuestion: {query}")
print(f"\nAnswer: {answer}")

llama_model_load_from_file_impl: using device Metal (Apple M3 Pro) - 9999 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from mistral-7b-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32           



[1m> Entering new RetrievalQA chain...[0m


llama_perf_context_print:        load time =   72295.20 ms
llama_perf_context_print: prompt eval time =   72294.94 ms /  1715 tokens (   42.15 ms per token,    23.72 tokens per second)
llama_perf_context_print:        eval time =   17586.73 ms /   332 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_perf_context_print:       total time =   90003.31 ms /  2047 tokens



[1m> Finished chain.[0m

Question: Tell me about Figuring the EIC?

Answer: 

Publication 554 (2023) Chapter 5 Credits 31

Page 31 of 37  Fileid: … tions/p554/2023/a/xml/cycle06/source 14:46 - 18-Jan-2024
The type and rule above prints on all proofs including departmental reproduction proofs. MUST be removed before printing.
• Pensions and annuities;
• Social security and railroad retirement benefits (in-
cluding disability benefits—except for payments cov-
ered under Disability benefits, earlier);
• Alimony and child support;
• Welfare benefits;
• Workers' compensation benefits;
• Unemployment compensation (insurance);
• Nontaxable foster care payments; and
• Veterans' benefits, including VA rehabilitation pay-
ments.
Don't include any of these items in your earned income.
Workfare payments. Nontaxable workfare payments 
aren't earned income for the EIC. These are cash pay-
ments certain people receive from a state or local agency 
that administers public assistance programs funded

In [10]:
import streamlit as st
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Set page configuration
st.set_page_config(page_title="PDF QA System", layout="wide")

# Initialize session state
if 'qa_chain' not in st.session_state:
    st.session_state.qa_chain = None
if 'db' not in st.session_state:
    st.session_state.db = None

def initialize_qa_system(pdf_file):
    # Create persist directory
    PERSIST_PATH = "./persistentdb/"
    os.makedirs(PERSIST_PATH, exist_ok=True)
    
    # Save uploaded file temporarily
    with open("temp.pdf", "wb") as f:
        f.write(pdf_file.getbuffer())
    
    # Load PDF
    loader = PyPDFLoader("temp.pdf")
    documents = loader.load()
    
    # Split text into smaller chunks
    text_splitter = CharacterTextSplitter(
        chunk_size=150,  # Reduced chunk size
        chunk_overlap=20  # Reduced overlap
    )
    split_docs = text_splitter.split_documents(documents)
    
    # Initialize embedding model
    embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    
    # Create vector store
    db = Chroma.from_documents(
        documents=split_docs,
        embedding=embedding_function,
        persist_directory=PERSIST_PATH
    )
    
    # Configure retriever with smaller k value
    retriever = db.as_retriever(
        search_kwargs={"k": 1}  # Reduced number of retrieved documents
    )
    
    # Create shorter prompt template
    prompt_template = """
    Answer the question based on the context below. Be concise and complete.
    Context: {context}
    Question: {question}
    Answer:"""
    
    PROMPT = PromptTemplate(
        template=prompt_template, 
        input_variables=["context", "question"]
    )
    
    # Initialize LLM with adjusted parameters
    llm = LlamaCpp(
        model_path="mistral-7b-v0.1.Q4_K_M.gguf",
        n_ctx=2048,
        temperature=0.3,
        max_tokens=512,  # Reduced max tokens
        verbose=False
    )
    
    # Create QA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={
            "prompt": PROMPT,
        },
        return_source_documents=True,
    )
    
    return qa_chain, db

# Function to clean up resources
def cleanup_resources():
    if st.session_state.db is not None:
        st.session_state.db = None
    if st.session_state.qa_chain is not None:
        st.session_state.qa_chain = None
    if os.path.exists("temp.pdf"):
        os.remove("temp.pdf")
    if os.path.exists("./persistentdb"):
        import shutil
        shutil.rmtree("./persistentdb")

# Streamlit UI
st.title("📚 PDF Question Answering System")

# File uploader
uploaded_file = st.file_uploader("Upload your PDF document", type=['pdf'], on_change=cleanup_resources)

if uploaded_file is not None:
    if st.session_state.qa_chain is None:
        with st.spinner('Initializing the QA system...'):
            st.session_state.qa_chain, st.session_state.db = initialize_qa_system(uploaded_file)
        st.success('QA system is ready!')

    # Question input
    question = st.text_input("Ask a question about your PDF:")
    
    if question:
        try:
            with st.spinner('Finding answer...'):
                result = st.session_state.qa_chain({"query": question})
                
                # Post-process answer to ensure complete sentences
                answer = result['result']
                if not answer.endswith(('.', '!', '?', ':', ')', ']', '}')):
                    last_period = max(answer.rfind('.'), answer.rfind('!'), answer.rfind('?'))
                    if last_period != -1:
                        answer = answer[:last_period + 1]
                
                # Display answer in a nice format
                st.markdown("### Answer:")
                st.write(answer)
                
                # Option to view source documents
                with st.expander("View Source Documents"):
                    for i, doc in enumerate(result['source_documents']):
                        st.markdown(f"**Source {i+1}:**")
                        st.write(doc.page_content)
                        st.markdown("---")
        
        except ValueError as e:
            if "context window" in str(e):
                st.error("The answer exceeded the model's capacity. Try asking a more specific question.")
                # Reset the QA chain to clear any accumulated context
                st.session_state.qa_chain, st.session_state.db = initialize_qa_system(uploaded_file)
            else:
                st.error(f"An error occurred: {str(e)}")
else:
    st.info("Please upload a PDF document to get started.")

# Add footer
st.markdown("---")
st.markdown("Made with ❤️ using LangChain and Streamlit")

2025-02-06 23:17:54.649 
  command:

    streamlit run /Users/tejaswinigunnapaneni/Documents/local_llm/genai_chatbot/.venv/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [None]:
# why every time its getting answer like below

# Question: Tell me about Qualifying child? Answer: Question: Tell me about Relationship, Age, Joint return, and Residency? Answer: