In [13]:
!pip install langchain==0.3.11
!pip install langchain-openai==0.2.12
!pip install langchain-community==0.3.11
!pip install chainlit==1.3.2
!pip install pyngrok==7.2.2
!pip install PyMuPDF==1.24.0
!pip install chromadb==0.5.23

Collecting chainlit==1.3.2
  Using cached chainlit-1.3.2-py3-none-any.whl.metadata (6.8 kB)
Collecting asyncer<0.0.8,>=0.0.7 (from chainlit==1.3.2)
  Using cached asyncer-0.0.7-py3-none-any.whl.metadata (6.6 kB)
Collecting fastapi<0.116,>=0.115.3 (from chainlit==1.3.2)
  Using cached fastapi-0.115.14-py3-none-any.whl.metadata (27 kB)
Collecting literalai==0.0.623 (from chainlit==1.3.2)
  Using cached literalai-0.0.623-py3-none-any.whl
Collecting python-multipart<0.0.10,>=0.0.9 (from chainlit==1.3.2)
  Using cached python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting starlette<0.42.0,>=0.41.2 (from chainlit==1.3.2)
  Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting uvicorn<0.26.0,>=0.25.0 (from chainlit==1.3.2)
  Using cached uvicorn-0.25.0-py3-none-any.whl.metadata (6.4 kB)
Collecting opentelemetry-api~=1.35.0 (from uptrace<2.0.0,>=1.22.0->chainlit==1.3.2)
  Using cached opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Collecting op

In [2]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [4]:
import yaml

with open('chatgpt_api_credentials.yml', 'r') as file:
    api_creds = yaml.safe_load(file)

In [5]:
api_creds.keys()

dict_keys(['openai_api_key'])

In [7]:
import os

os.environ['OPENAI_API_KEY'] = api_creds['openai_api_key']

In [19]:
%%writefile app.py

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain.schema.runnable.config import RunnableConfig
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import StrOutputParser
from langchain_community.vectorstores.chroma import Chroma
from operator import itemgetter
import chainlit as cl
import tempfile
import os 
import pandas as pd

# Takes uploaded PDFs, creates document chunks, computes embeddings
# Stores document chunks and embeddings in a Vector DB
# Returns a retriever which can look up the Vector DB
# to return documents based on user input
def configure_retriever(uploaded_files):
  # Read documents
  docs = []
  temp_dir = tempfile.TemporaryDirectory()
  for file in uploaded_files:
    temp_filepath = os.path.join(temp_dir.name, file.name)
    with open(temp_filepath, "wb") as f:
      with open(file.path, 'rb') as infile:
        f.write(infile.read())
    loader = PyMuPDFLoader(temp_filepath)
    docs.extend(loader.load())

  # Split into documents chunks
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500,
                                                 chunk_overlap=200)
  doc_chunks = text_splitter.split_documents(docs)

  # Create document embeddings and store in Vector DB
  embeddings_model = OpenAIEmbeddings()
  vectordb = Chroma.from_documents(doc_chunks, embeddings_model)

  # Define retriever object
  retriever = vectordb.as_retriever()
  return retriever

@cl.on_chat_start
# this function is called when the app starts for the first time
async def when_chat_starts():
  # Create UI element to accept PDF uploads
  uploaded_files = None
  # Wait for the user to upload a file
  while uploaded_files == None:
    uploaded_files = await cl.AskFileMessage(
      content="Please upload PDF documents to continue.",
      accept=["application/pdf"],
      max_size_mb=20, max_files=5, timeout=180
    ).send()

  msg = cl.Message(content=f"Processing files please wait...", disable_feedback=True)
  await msg.send()
  await cl.sleep(2)
  # Create retriever object based on uploaded PDFs
  retriever = configure_retriever(uploaded_files)
  msg = cl.Message(content=f"Processing completed. You can now ask questions!") #, disable_feedback=True)
  await msg.send()

  # Load a connection to ChatGPT LLM
  chatgpt = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.1,
                      streaming=True)

  # Create a prompt template for QA RAG System
  qa_template = """
                Use only the following pieces of context to answer the question at the end.
                If you don't know the answer, just say that you don't know,
                don't try to make up an answer. Keep the answer as concise as possible.

                {context}

                Question: {question}
                """
  qa_prompt = ChatPromptTemplate.from_template(qa_template)

  # This function formats retrieved documents before sending to LLM
  def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

  # Create a QA RAG System Chain
  qa_rag_chain = (
    {
      "context": itemgetter("question") # based on the user question get context docs
        |
      retriever
        |
      format_docs,
      "question": itemgetter("question") # user question
    }
      |
    qa_prompt # prompt with above user question and context
      |
    chatgpt # above prompt is sent to the LLM for response
      |
    StrOutputParser() # to parse the output to show on UI
  )
  # Set session variables to be accessed when user enters prompts in the app
  cl.user_session.set("qa_rag_chain", qa_rag_chain)


@cl.on_message
# this function is called whenever the user sends a prompt message in the app
async def on_user_message(message: cl.Message):

  # get the chain and memory objects from the session variables
  qa_rag_chain = cl.user_session.get("qa_rag_chain")

  # this will store the response from ChatGPT LLM
  chatgpt_message = cl.Message(content="")

  #Callback handler for handling the retriever and LLM processes.
  # Used to post the sources of the retrieved documents as a Chainlit element.
  class PostMessageHandler(BaseCallbackHandler):
    def __init__(self, msg: cl.Message):
      BaseCallbackHandler.__init__(self)
      self.msg = msg
      self.sources = []

    def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
      source_ids = []
      for d in documents: # retrieved documents from retriever based on user query
        metadata = {
          "source": d.metadata["source"],
          "page": d.metadata["page"],
          "content": d.page_content[:200]
        }
        idx = (metadata["source"], metadata["page"])
        if idx not in source_ids: # store unique source documents
          source_ids.append(idx)
          self.sources.append(metadata)

    def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
      if len(self.sources):
          sources_table = pd.DataFrame(self.sources[:3]).to_markdown()
          self.msg.elements.append(
            cl.Text(name="Sources", content=sources_table, display="inline")
          )

  # Stream the response from ChatGPT and show in real-time
  async with cl.Step(type="run", name="QA Assistant"):
    async for chunk in qa_rag_chain.astream(
        {"question": message.content},
        config=RunnableConfig(callbacks=[
            cl.LangchainCallbackHandler(),
            PostMessageHandler(chatgpt_message)
        ]),
    ):
        await chatgpt_message.stream_token(chunk)
  await chatgpt_message.send()

Overwriting app.py


In [20]:
# !chainlit run app.py --port=8989 --watch &>./logs.txt & 

!chainlit run app.py --port=8989 --watch

2025-09-28 20:50:50 - Your app is available at http://localhost:8989
2025-09-28 20:50:53 - Translated markdown file for en-US not found. Defaulting to chainlit.md.
2025-09-28 20:50:59 - Message.__init__() got an unexpected keyword argument 'disable_feedback'
Traceback (most recent call last):
  File "/Users/azeez/miniforge3/lib/python3.10/site-packages/chainlit/utils.py", line 44, in wrapper
    return await user_function(**params_values)
  File "/Users/azeez/miniforge3/lib/python3.10/site-packages/chainlit/step.py", line 112, in async_wrapper
    result = await func(*args, **kwargs)
  File "/Users/azeez/Desktop/AI/Personal Projects/GitHub/RAG/app.py", line 58, in when_chat_starts
    msg = cl.Message(content=f"Processing files please wait...", disable_feedback=True)
TypeError: Message.__init__() got an unexpected keyword argument 'disable_feedback'
2025-09-28 20:50:59 - 2 changes detected
^C


In [11]:
! pip install -U chainlit
! python -c "import chainlit; print(chainlit.__version__)"

Collecting chainlit
  Using cached chainlit-2.8.1-py3-none-any.whl.metadata (8.1 kB)
Collecting asyncer<0.1.0,>=0.0.8 (from chainlit)
  Using cached asyncer-0.0.8-py3-none-any.whl.metadata (6.7 kB)
Collecting fastapi<0.117,>=0.116.1 (from chainlit)
  Using cached fastapi-0.116.2-py3-none-any.whl.metadata (28 kB)
Collecting literalai==0.1.201 (from chainlit)
  Using cached literalai-0.1.201.tar.gz (67 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting mcp<2.0.0,>=1.11.0 (from chainlit)
  Using cached mcp-1.15.0-py3-none-any.whl.metadata (80 kB)
Collecting pydantic-settings>=2.10.1 (from chainlit)
  Using cached pydantic_settings-2.11.0-py3-none-any.whl.metadata (3.4 kB)
Collecting python-multipart<1.0.0,>=0.0.18 (from chainlit)
  Using cached python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting starlette>=0.47.2 (from chainlit)
  Using cached starlette-0.48.0-py3-none-any.whl.metadata (6.3 kB)
Collecting uvicorn>=0.35.0 (from chainlit)
  Using cached uv

In [21]:
%%writefile chainlit.md

# Welcome to File QA RAG Chatbot 🤖

Please ask your question?

Overwriting chainlit.md


In [25]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken
# Get your authtoken from `ngrok_credentials.yml` file
with open('ngrok_credentials.yml', 'r') as file:
    NGROK_AUTH_TOKEN = yaml.safe_load(file)
ngrok.set_auth_token(NGROK_AUTH_TOKEN['ngrok_key'])

# Open an HTTPs tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Chainlit App:", ngrok_tunnel.public_url)

Chainlit App: https://unodored-diatomaceous-latoya.ngrok-free.dev


t=2025-09-28T22:05:04+0300 lvl=warn msg="failed to open private leg" id=e7ada8ca0323 privaddr=localhost:8989 err="dial tcp 127.0.0.1:8989: connect: connection refused"
t=2025-09-28T22:05:05+0300 lvl=warn msg="failed to open private leg" id=b48a035452b7 privaddr=localhost:8989 err="dial tcp 127.0.0.1:8989: connect: connection refused"
t=2025-09-28T22:05:48+0300 lvl=warn msg="failed to open private leg" id=73e6d14476be privaddr=localhost:8989 err="dial tcp 127.0.0.1:8989: connect: connection refused"
t=2025-09-28T22:05:49+0300 lvl=warn msg="failed to open private leg" id=e38207be3808 privaddr=localhost:8989 err="dial tcp 127.0.0.1:8989: connect: connection refused"


In [28]:
ngrok.kill()

!ps -ef | grep app

In [30]:
!sudo kill -9 11975 

Password: