In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

DATA_PATH = "./test"
DB_PATH = "./vectorstores/db/"

def create_vector_db():
    print(DATA_PATH)
    loader = PyPDFDirectoryLoader(DATA_PATH)
    documents = loader.load()

    print(f"Processed {len(documents)} pdf files")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embedding_function = HuggingFaceEmbeddings(
        model_name='BAAI/bge-m3'
    )

    vectorstore = Chroma.from_documents(documents=texts, embedding=embedding_function, persist_directory=DB_PATH)
    # vectorstore.persist()

if __name__ == "__main__":
    create_vector_db()

./test
Processed 11 pdf files


2024-12-19 07:28:13.721307: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-19 07:28:13.727195: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734593293.735615      88 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734593293.738082      88 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-19 07:28:13.747130: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [3]:
%%writefile app.py

import chainlit as cl

from langchain import hub
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain

QA_CHAIN_PROMPT = hub.pull('rlm/rag-prompt-mistral')

def load_llm():
    llm = Ollama(
        model="llama3",
        verbose=True,
        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
    )
    return llm

def retrieval_qa_chain(llm, vectorstore):
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        return_source_documents=True
    )
    return qa_chain

def qa_bot():
    llm = load_llm()
    DB_PATH="vectorstores/db/"
    embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-m3')
    vectorstore = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
    qa = retrieval_qa_chain(llm, vectorstore)
    return qa

# 새로운 채팅 세션이 시작될 때 호출되는 함수를 정의
@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(contents="Firing up the research into bot...")
    await msg.send()
    msg.content = "Hi, welcome to the research info bot. What is your query?"
    await msg.update()
    cl.user_session.set("chain", chain)

# 사용자가 보낸 메시지를 수신할 때 호출되는 함수
@cl.on_message
async def main(message):
    chain = cl.cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True,
        answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    cb.answer_reached=True

    res = await chain.acall(message.content, callbacks=[cb])
    print(f"response: {res}")

    answer = res["result"]
    answer = answer.replace(".", ".\n")
    sources = res["source_documents"]

    if sources:
        answer += f"\nSources: " + str(str(sources))
    else:
        answer += f"\nNo Sources found"

    await cl.Message(content=answer).send()

Overwriting app.py


In [4]:
!chainlit lint-translations

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



Linting ta.json...
⚠️ Extra key: 'components.molecules.detailsButton.took_one'
⚠️ Extra key: 'components.molecules.detailsButton.running'
⚠️ Extra key: 'components.molecules.detailsButton.took_other'
❌ Missing key: 'components.molecules.detailsButton.used'

Linting te.json...
⚠️ Extra key: 'components.molecules.detailsButton.took_one'
⚠️ Extra key: 'components.molecules.detailsButton.running'
⚠️ Extra key: 'components.molecules.detailsButton.took_other'
❌ Missing key: 'components.molecules.detailsButton.used'

Linting gu.json...
⚠️ Extra key: 'components.molecules.detailsButton.took_one'
⚠️ Extra key: 'components.molecules.detailsButton.running'
⚠️ Extra key: 'components.molecules.detailsButton.took_other'
❌ Missing key: 'components.molecules.detailsButton.used'

Linting mr.json...
⚠️ Extra key: 'components.molecules.detailsButton.took_one'
⚠️ Extra key: 'components.molecules.detailsButton.running'
⚠️ Extra key: 'components.molecules.detailsButton.took_other'
❌ Missing key: 'component