# File QA RAG Chatbot App with ChatGPT, LangChain and Chainlit

Here we will implement an advanced RAG System with ChatGPT, LangChain and Chainlit to build a File QA UI-based chatbot with the following features:

- PDF Document Upload and Indexing
- RAG System for query analysis and response
- Result streaming capabilities (Real-time output)
- Show document sources of the answer from RAG system

## Install App and LLM dependencies

In [1]:
!pip install langchain==0.3.11
!pip install langchain-openai==0.2.12
!pip install langchain-community==0.3.11
!pip install chainlit==1.3.2
!pip install pyngrok==7.2.2
!pip install PyMuPDF==1.24.0
!pip install chromadb==0.6.3
!pip install pydantic==2.10.1
!pip install langchain-chroma==0.2.2

Collecting langchain==0.3.11
  Downloading langchain-0.3.11-py3-none-any.whl.metadata (7.1 kB)
Collecting langsmith<0.3,>=0.1.17 (from langchain==0.3.11)
  Downloading langsmith-0.2.11-py3-none-any.whl.metadata (14 kB)
Collecting numpy<2,>=1.22.4 (from langchain==0.3.11)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Downloading langchain-0.3.11-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langsmith-0.2.11-py3-none-any.whl (326 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m326.9/326.9 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Collecting pyngrok==7.2.2
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Downloading pyngrok-7.2.2-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.2
Collecting PyMuPDF==1.24.0
  Downloading PyMuPDF-1.24.0-cp311-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.24.0 (from PyMuPDF==1.24.0)
  Downloading PyMuPDFb-1.24.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.4 kB)
Downloading PyMuPDF-1.24.0-cp311-none-manylinux2014_x86_64.whl (3.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m88.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyMuPDFb-1.24.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (30.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.8/30.8 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.24.0 PyMuPDFb-1.24.0
Co

## Load OpenAI API Credentials

Here we load it from a file so we don't explore the credentials on the internet by mistake

In [2]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [3]:
import yaml

with open('api_keys.yml', 'r') as file:
    api_creds = yaml.safe_load(file)

In [4]:
api_creds.keys()

dict_keys(['openai_key', 'ngrok_key'])

In [5]:
import os

os.environ['OPENAI_API_KEY'] = api_creds['openai_key']

## Write the app code here and store it in a py file

In [6]:
# the following line is a magic command
# that will write all the code below it to the python file app.py
# we will then deploy this app.py file on the cloud server where colab is running
# if you have your own server you can just write the code in app.py and deploy it directly
%%writefile app.py

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.chroma import Chroma
from langchain.schema import StrOutputParser
import chainlit as cl
import chromadb
import tempfile
import os
import pandas as pd

# Step 1: Load PDFs and embed with company tag
def configure_retriever(uploaded_files):
    docs = []
    temp_dir = tempfile.TemporaryDirectory()
    for file in uploaded_files:
        temp_filepath = os.path.join(temp_dir.name, file.name)
        with open(temp_filepath, "wb") as f:
            with open(file.path, 'rb') as infile:
                f.write(infile.read())
        loader = PyMuPDFLoader(temp_filepath)
        file_docs = loader.load()
        company_name = file.name.split('.')[0].lower()
        for doc in file_docs:
            doc.metadata["company"] = company_name
        docs.extend(file_docs)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
    doc_chunks = text_splitter.split_documents(docs)

    embeddings_model = OpenAIEmbeddings()
    client = chromadb.PersistentClient(path="./chroma_db")

    vectordb = Chroma.from_documents(
        documents=doc_chunks,
        embedding=embeddings_model,
        client=client,
        collection_name="document_collection"
    )

    retriever = vectordb.as_retriever(search_kwargs={"k": 3})
    return retriever, doc_chunks

@cl.on_chat_start
async def when_chat_starts():
    uploaded_files = None
    while uploaded_files is None:
        uploaded_files = await cl.AskFileMessage(
            content="Upload SEC filings (PDFs like 10-Q/10-K).",
            accept=["application/pdf"],
            max_size_mb=20, max_files=5, timeout=180
        ).send()

    msg = cl.Message(content="Processing files...")
    await msg.send()
    await cl.sleep(2)

    retriever, doc_chunks = configure_retriever(uploaded_files)
    cl.user_session.set("retriever", retriever)
    cl.user_session.set("raw_chunks", doc_chunks)

    llm = ChatOpenAI(model_name='gpt-4o-mini', temperature=0.1)
    cl.user_session.set("llm", llm)

    msg.content = "✅ Processing complete. You can now ask questions!"
    await msg.update()

@cl.on_message
async def on_user_message(message: cl.Message):
    retriever = cl.user_session.get("retriever")
    doc_chunks = cl.user_session.get("raw_chunks")
    llm = cl.user_session.get("llm")
    query = message.content.lower()

    companies = ["oracle", "microsoft", "salesforce", "workday"]
    mentioned = [c for c in companies if c in query]
    context_docs = []

    msg = cl.Message(content="Working on your query...")
    await msg.send()

    class PostMessageHandler(BaseCallbackHandler):
        def __init__(self):
            super().__init__()
            self.sources = []

        def on_retriever_end(self, documents, **kwargs):
            for d in documents:
                self.sources.append({
                    "source": d.metadata.get("source", "unknown"),
                    "page": d.metadata.get("page", "NA"),
                    "content": d.page_content[:200]
                })

    handler = PostMessageHandler()

    if "compare" in query and len(mentioned) >= 2:
        embedder = OpenAIEmbeddings()
        query_vec = embedder.embed_query(query)
        for company in mentioned:
            docs = [d for d in doc_chunks if company in d.metadata.get("company", "")]
            scored = sorted(docs, key=lambda d: -query_vec @ embedder.embed_documents([d.page_content])[0])
            context_docs.extend(scored[:2])
    else:
        context_docs = retriever.get_relevant_documents(query)

    context = "\n\n".join([d.page_content for d in context_docs])
    qa_prompt = f"""
Use the following context extracted from SEC filings to answer the question.
Only compare values if you find relevant data from multiple companies.

{context}

Question: {message.content}
"""

    print("\n--- DEBUG ---\nPrompt:\n", qa_prompt[:2000])  # Short preview

    # Use ainvoke instead of stream
    response = await llm.ainvoke([{"role": "user", "content": qa_prompt}])
    await msg.stream_token(response.content)

    # Add sources if any
    if handler.sources:
        sources_table = pd.DataFrame(handler.sources[:3]).to_markdown()
        msg.elements.append(cl.Text(name="Sources", content=sources_table, display="inline"))

    await msg.update()

Writing app.py


## Start the app

In [7]:
!chainlit run app.py --port=8989 --watch &>./logs.txt &

## Change the Initial app screen

In [8]:
%%writefile chainlit.md

Welcome to Finance QA RAG Chatbot 🤖

Please ask your question?

Overwriting chainlit.md


In [9]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken
# Get your authtoken from `ngrok_credentials.yml` file
# with open('ngrok_credentials.yml', 'r') as file:
#     NGROK_AUTH_TOKEN = yaml.safe_load(file)
ngrok.set_auth_token(api_creds['ngrok_key'])

# Open an HTTPs tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Chainlit App:", ngrok_tunnel.public_url)

Chainlit App: https://787e-34-126-163-212.ngrok-free.app


In [None]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken
# Get your authtoken from `ngrok_credentials.yml` file
#with open('ngrok_credentials.yml', 'r') as file:
  #ngrok_creds = yaml.safe_load(file)  # Load ngrok credentials into ngrok_creds
#ngrok.set_auth_token(ngrok_creds['ngrok_key'])  # Access ngrok_key from ngrok_creds

# Open an HTTPs tunnel on port XXXX which you get from your `logs.txt` file
#ngrok_tunnel = ngrok.connect(8989)
#print("Chainlit App:", ngrok_tunnel.public_url)

## Remove running app processes

In [None]:
ngrok.kill()

In [None]:
!ps -ef | grep app

root           7       1  2 10:57 ?        00:00:06 /tools/node/bin/node /datalab/web/app.js
root         110       7  1 10:57 ?        00:00:04 /usr/bin/python3 /usr/local/bin/jupyter-notebook --debug --transport="ipc" --ip=172.28.0.12 --NotebookApp.token= --port=9000 --FileContentsManager.root_dir=/ --MappingKernelManager.root_dir=/content
root        1443     617  0 11:01 ?        00:00:00 /bin/bash -c ps -ef | grep app
root        1445    1443  0 11:01 ?        00:00:00 grep app


In [None]:
!sudo kill -9 6651

kill: (6651): No such process


In [None]:
# the following line is a magic command
# that will write all the code below it to the python file app.py
# we will then deploy this app.py file on the cloud server where colab is running
# if you have your own server you can just write the code in app.py and deploy it directly
%%writefile app.py


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain.schema.runnable.config import RunnableConfig
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import StrOutputParser
from langchain_community.vectorstores.chroma import Chroma
from operator import itemgetter
import chainlit as cl
import chromadb
import tempfile
import os
import pandas as pd

# Load and prepare documents for retrieval
def configure_retriever(uploaded_files):
    docs = []
    temp_dir = tempfile.TemporaryDirectory()
    for file in uploaded_files:
        temp_filepath = os.path.join(temp_dir.name, file.name)
        with open(temp_filepath, "wb") as f:
            with open(file.path, 'rb') as infile:
                f.write(infile.read())
        loader = PyMuPDFLoader(temp_filepath)
        docs.extend(loader.load())

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
    doc_chunks = text_splitter.split_documents(docs)

    # Store embeddings in vector DB
    embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
    client = chromadb.PersistentClient(path="./chroma_db")

    vectordb = Chroma.from_documents(
        documents=doc_chunks,
        embedding=embeddings_model,
        client=client,
        collection_name="document_collection"
    )
    retriever = vectordb.as_retriever(search_kwargs={"k": 3})
    return retriever, doc_chunks

@cl.on_chat_start
async def when_chat_starts():
    uploaded_files = None
    while uploaded_files is None:
        uploaded_files = await cl.AskFileMessage(
            content="Please upload SEC filing PDFs (e.g., 10-Q/10-K).",
            accept=["application/pdf"],
            max_size_mb=20, max_files=5, timeout=180
        ).send()

    msg = cl.Message(content=f"Processing uploaded files...")
    await msg.send()
    await cl.sleep(2)

    retriever, doc_chunks = configure_retriever(uploaded_files)

    chatgpt = ChatOpenAI(model_name='gpt-4o-mini', temperature=0.1, streaming=True)

    qa_template = """
    Use only the following pieces of context to answer the question at the end.
    If you don't know the answer, just say you don't know.

    {context}

    Question: {question}
    """
    qa_prompt = ChatPromptTemplate.from_template(qa_template)

    def format_docs(docs):
        return "\n\n".join([d.page_content for d in docs])

    qa_rag_chain = (
        {
            "context": itemgetter("question") | retriever | format_docs,
            "question": itemgetter("question")
        }
        | qa_prompt
        | chatgpt
        | StrOutputParser()
    )

    cl.user_session.set("qa_rag_chain", qa_rag_chain)
    cl.user_session.set("retriever", retriever)
    cl.user_session.set("raw_docs", doc_chunks)

    msg.content = "You're ready! Ask questions about the uploaded filings."
    await msg.update()

@cl.on_message
async def on_user_message(message: cl.Message):
    qa_rag_chain = cl.user_session.get("qa_rag_chain")
    retriever = cl.user_session.get("retriever")
    raw_docs = cl.user_session.get("raw_docs")

    msg = cl.Message(content="Answering your query...")
    await msg.send()

    class PostMessageHandler(BaseCallbackHandler):
        def __init__(self):
            super().__init__()
            self.sources = []

        def on_retriever_end(self, documents, **kwargs):
            for d in documents:
                self.sources.append({
                    "source": d.metadata.get("source", "unknown"),
                    "page": d.metadata.get("page", "NA"),
                    "content": d.page_content[:200]
                })

    handler = PostMessageHandler()

    # RAG-based answer
    rag_answer = ""
    rag_stream = qa_rag_chain.stream(
        {"question": message.content},
        config=RunnableConfig(callbacks=[handler])
    )
    async for token in rag_stream:
        rag_answer += token

    # Prompt-only baseline using first 3 chunks
    top_chunks = raw_docs[:3]
    context = "\n\n".join([d.page_content for d in top_chunks])
    chatgpt = ChatOpenAI(model_name='gpt-4o-mini', temperature=0.1)

    prompt_only_template = """
    Use the following SEC filing excerpts to answer the question below.
    If unsure, say "Not found in document".

    {context}

    Question: {question}
    """
    prompt_only_text = prompt_only_template.format(context=context, question=message.content)
    prompt_only_response = chatgpt.invoke([{"role": "user", "content": prompt_only_text}]).content

    # Comparison markdown
    answer_md = f"""### ❓ Question
{message.content}

---

### 🔍 RAG-based Answer
{rag_answer}

---

### 📄 Prompt-only Answer
{prompt_only_response}
"""

    msg.content = answer_md
    if handler.sources:
        sources_table = pd.DataFrame(handler.sources[:3]).to_markdown()
        msg.elements.append(cl.Text(name="Top RAG Sources", content=sources_table, display="inline"))
    await msg.update()

Writing app.py
