In [1]:
!pip install llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface transformers accelerate bitsandbytes llama-index-readers-web chainlit langchain-community python-dotenv unstructured torch nltk

Collecting llama-index
  Downloading llama_index-0.11.23-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-llms-huggingface
  Downloading llama_index_llms_huggingface-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.4.0-py3-none-any.whl.metadata (767 bytes)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting llama-index-readers-web
  Downloading llama_index_readers_web-0.3.0-py3-none-any.whl.metadata (1.2 kB)
Collecting chainlit
  Downloading chainlit-1.3.2-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.7-py3-none-any.whl.metadata (2.9 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting unstructured
  Downloading unstructured-0.16.5-py3-none-any.whl.metadata (24 kB)
Collecting llama-index-agent-openai<0.4.0,>=0.

In [2]:
!mkdir data
!wget "https://www.dropbox.com/scl/fi/52yxo196e134l58trhjfm/Brief-history-of-Vietnam.zip?rlkey=u0bsxff2w6y8y6bpn8whfea07&st=1aqde08r&dl=1" -O data/Brief-history-of-Vietnam.zip
!unzip data/Brief-history-of-Vietnam.zip -d data

--2024-11-18 10:42:37--  https://www.dropbox.com/scl/fi/52yxo196e134l58trhjfm/Brief-history-of-Vietnam.zip?rlkey=u0bsxff2w6y8y6bpn8whfea07&st=1aqde08r&dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.81.18, 2620:100:6030:18::a27d:5012
Connecting to www.dropbox.com (www.dropbox.com)|162.125.81.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://uce003333f9392ad02a4f90545cc.dl.dropboxusercontent.com/cd/0/inline/CenFO3O3jngRfZ3BzC_0S_75TL6jiJvQidw0HhDXnQpGj-0EnEi4PBZppR06uRaEFJiFktO6fzy7Fvrpe8K16l8NzmzG8oqed6jkEl3MLH5OKU0IjI_QZGauyJNFU6vCqy4DZuNyoEfyphWqIXbs0nza/file?dl=1# [following]
--2024-11-18 10:42:38--  https://uce003333f9392ad02a4f90545cc.dl.dropboxusercontent.com/cd/0/inline/CenFO3O3jngRfZ3BzC_0S_75TL6jiJvQidw0HhDXnQpGj-0EnEi4PBZppR06uRaEFJiFktO6fzy7Fvrpe8K16l8NzmzG8oqed6jkEl3MLH5OKU0IjI_QZGauyJNFU6vCqy4DZuNyoEfyphWqIXbs0nza/file?dl=1
Resolving uce003333f9392ad02a4f90545cc.dl.dropboxusercontent.com (uce003333f9392ad02a4f90545cc.dl.d

In [3]:
!pip install chainlit



I install Chainlit for chatbot interface and install pyngrok for tunelling it with a localhost server

In [5]:
!pip install chainlit
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


The code below is the one I create to build the Chainlit interface for my chatbot and save it to chainlit_app.py file


In [26]:
%%writefile chainlit_app.py
import chainlit as cl
import torch
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.readers.file import UnstructuredReader
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from pathlib import Path
import nltk

# Initialize your chatbot components
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt_tab')

# Set up your LLM and embedding model
llm = HuggingFaceLLM(
    model_name="HuggingFaceH4/zephyr-7b-beta",
    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
    query_wrapper_prompt=PromptTemplate("<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    device_map="auto",
)

Settings.llm = llm
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Load documents and create indices
years = [1975, 1969, 1945]
loader = UnstructuredReader()
doc_set = {}
all_docs = []

for year in years:
    year_docs = loader.load_data(file=Path(f"./data/War/{year}.htm"), split_documents=False)
    for d in year_docs:
        d.metadata = {"year": year}
    doc_set[year] = year_docs
    all_docs.extend(year_docs)

# Create or load vector indices
index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults()
    cur_index = VectorStoreIndex.from_documents(doc_set[year], storage_context=storage_context)
    index_set[year] = cur_index
    storage_context.persist(persist_dir=f"./storage/{year}")

# Create a query engine
query_engine_tools = [
    QueryEngineTool(
        query_engine=index_set[year].as_query_engine(),
        metadata=ToolMetadata(
            name=f"vector_index_{year}",
            description=f"useful for when you want to answer queries about the {year} War of Vietnam",
        ),
    )
    for year in years
]

sub_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    llm=HuggingFaceLLM(model_name="HuggingFaceH4/zephyr-7b-alpha"),
)

# Chainlit function for handling user messages
@cl.on_message
async def main(message: cl.Message):
    try:
        # Extract the text content from the message object
        user_input = message.content
        response = sub_query_engine.query(user_input)
        await cl.Message(content=str(response)).send()
    except Exception as e:
        error_message = f"Error processing your request: {e}"
        await cl.Message(content=error_message).send()
        print(f"Detailed Error: {e}")


Overwriting chainlit_app.py


This cell of code will fix a CUDA launch blocking after running the model

In [30]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


Run this cell below to start the Chainlit server in the background

In [39]:
!chainlit run chainlit_app.py --port 8000 --host 0.0.0.0







2024-11-18 12:26:13.773672: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-18 12:26:13.789723: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-18 12:26:13.811408: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-18 12:26:13.818176: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-18 12:26:13.834147: I tensorflow/core/platform/cpu_feature

After creating ngrok account on this link (https://dashboard.ngrok.com/get-started/setup/windows) you can get your Authtoken and you will need to authorize it using the below cell code

In [9]:
from pyngrok import ngrok
!ngrok authtoken 2p14MSBMnKxVGWCCZqJiwEjVnx2_2ZFZxWPxmMXU97n9NFoLm


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


After that run this final cell to generate a link for the chatbot server and open that link to run the chatbot

In [38]:
from pyngrok import ngrok

# List all active tunnels
tunnels = ngrok.get_tunnels()
print(tunnels)

# Close a specific tunnel by its public URL
# Replace with the actual public URL of the tunnel you want to close
# Example: ngrok.disconnect("https://your-tunnel-url.ngrok.io")

# Or close all active tunnels
for tunnel in tunnels:
    ngrok.disconnect(tunnel.public_url)
    print(f"Disconnected tunnel: {tunnel.public_url}")

# Now try creating a new tunnel
public_url = ngrok.connect(8000)
print(f"Chainlit app is accessible at: {public_url}")

[]
Chainlit app is accessible at: NgrokTunnel: "https://9326-34-143-144-20.ngrok-free.app" -> "http://localhost:8000"


After all this step I can create Chainlit UI for my chatbot. However, it still have some error with CUDA for the free version of chatbot