## **1. Install and import libraries**

In [2]:
%pip install -q transformers==4.41.2
%pip install -q bitsandbytes==0.43.1
%pip install -q accelerate==0.31.0
%pip install -q langchain==0.2.5
%pip install -q langchainhub==0.1.20
%pip install -q langchain-chroma==0.1.1
%pip install -q langchain-community==0.2.5
%pip install -q langchain-openai==0.1.9
%pip install -q langchain_huggingface==0.0.3
%pip install -q chainlit==1.1.304
%pip install -q python-dotenv==1.0.1
%pip install -q pypdf==4.2.0
%npm install -g localtunnel
%pip install -q numpy==1.24.4

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.4/337.4 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.5/127.5 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.1/141.1 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m559.5/559.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━

In [3]:
import chainlit as cl
import torch

from chainlit.types import AskFileResponse

from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from langchain_huggingface.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.chains import ConversationalRetrievalChain
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

# Run with tunnel

## **2. Initialize text splitter and embedding**

In [None]:
text_splitter = RecursiveCharacterTextSplitter (chunk_size=1000,
                                                chunk_overlap=100)
embedding = HuggingFaceEmbeddings ()

## **3. Create a function to load and split pdf file**

In [5]:
def process_file(file: AskFileResponse):
  if file.type == "text/plain":
    Loader = TextLoader
  elif file.type == "application/pdf":
    Loader = PyPDFLoader
    loader = Loader (file.path)
    documents = loader.load()
    docs = text_splitter.split_documents (documents)
  for i, doc in enumerate (docs):
    doc.metadata["source"] = f"source_{i}"
  return docs

## **4. Create a function to get vector database**

In [6]:
def get_vector_db(file: AskFileResponse):
  docs = process_file(file)
  cl.user_session.set("docs", docs)
  vector_db = Chroma.from_documents(documents=docs,
  embedding=embedding)

  return vector_db

## **5. Create a function to get LLMs (Vicuna)**

In [None]:
def get_huggingface_llm(model_name: str = "lmsys/vicuna-7b-v1.5", max_new_token: int = 512):
    nf4_config = {
        "load_in_4bit": True,
        "bnb_4bit_quant_type": "nf4",
        "bnb_4bit_use_double_quant": True,
        "bnb_4bit_compute_dtype": torch.bfloat16
    }

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=nf4_config,
        low_cpu_mem_usage=True
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_token,
        pad_token_id=tokenizer.eos_token_id,
    )

    return model_pipeline

LLM = get_huggingface_llm()


## **6. Build Chainlit**

In [8]:
welcome_message = """Welcome to the PDF QA! To get started:
1. Upload a PDF or text file
2. Ask a question about the file
"""

In [9]:
@cl.on_chat_start
async def on_chat_start():
    files = None
    while files is None:
        files = await cl.AskFileMessage(
            content="Welcome message",
            accept=["text/plain", "application/pdf"],
            max_size_mb=20,
            timeout=180,
        ).send()

    file = files[0]
    msg = cl.Message(content=f"Processing '{file.name}'...", disable_feedback=True)
    await msg.send()

    vector_db = await cl.make_async(get_vector_db)(file)
    message_history = ChatMessageHistory()
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    retriever = vector_db.as_retriever(search_type="your_search_type", search_kwargs={'k': 3})

    chain = ConversationalRetrievalChain.from_llm(
        llm=LLM,
        chain_type="stuff",
        retriever=retriever,
        memory=memory,
        return_source_documents=True
    )

    msg.content = f"'{file.name}' processed. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)


In [10]:
@cl.on_message
async def on_message(message: cl.Message):
    chain = cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler()
    res = await chain.invoke(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res.get("source_documents", [])
    text_elements = []

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )

        source_names = [text_el.name for text_el in text_elements]
        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()


## **7. Host app**

### Tunnel

In [11]:
import urllib

print("Password/EnpointIPforlocaltunnelis:", urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

Password/EnpointIPforlocaltunnelis: 34.75.195.220


In [12]:
!chainlit run app.py --host 0.0.0.0 --port 8000 &>/content/logs.txt

In [None]:
!lt --port 8000 --subdomain aivn-simple-rag

# Run with Ngrok

In [1]:
# %%writefile app.py

Writing app.py


### Ngrok

In [3]:
%pip install pyngrok -q

In [18]:
from pyngrok import ngrok

!ngrok config add-authtoken <your-ngrok-token>

public_url = ngrok.connect(8000).public_url

print(public_url)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
https://d8a9-35-203-177-136.ngrok-free.app


In [19]:
!chainlit run app.py

2024-07-04 11:27:00.105841: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-04 11:27:00.105887: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-04 11:27:00.113027: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

`from langchain_community.llms import HuggingFacePipeline`.

To install langchain-community run `pip install -U langchain-community`.
2024-07-04 11:27:06 - Use pytorch device_name: cuda
2024-07-04 11:27:06 - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
Loading checkpoint shards: 100% 2/2 [01:01<00:00, 30.86s/it]
  warn_deprecated(

