# Playground for Chatbot Example App

## Python Imports

In [1]:
!pip install openai==0.28
import os
import uuid

from time import time

from redisvl.llmcache.semantic import SemanticCache
from redisvl.vectorize.text import OpenAITextVectorizer

# we need those to fine tune vector dimension for openAI which is larger than default
import redis as redisclient
from redis.commands.search.field import VectorField
from redisvl.index import SearchIndex

from redisvl.vectorize.text import OpenAITextVectorizer
import tiktoken #required for OpenAI

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.memory import ConversationBufferMemory
from langchain.memory.chat_message_histories import RedisChatMessageHistory
from langchain.embeddings import OpenAIEmbeddings
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores import Redis
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from dotenv import load_dotenv
import gradio as gr



## Apply Config

In [2]:
load_dotenv()

class AppConfig:
    DOCS_FOLDER=os.environ["DOCS_FOLDER"]
    REDIS_URL=os.environ["LOCAL_REDIS_URL"]
    OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]
    OPENAI_EMBEDDING_MODEL=os.environ["OPENAI_EMBEDDING_MODEL"]
    OPENAI_CHAT_MODEL=os.environ["OPENAI_CHAT_MODEL"]
    CHUNK_SIZE=int(os.getenv("CHUNK_SIZE", 500))
    CHUNK_OVERLAP=int(os.getenv("CHUNK_OVERLAP", 10))
    PAGE_TITLE=os.getenv("PAGE_TITLE", "📃 Chat Your PDF")
    PAGE_ICON=os.getenv("PAGE_ICON", "📃")
    RETRIEVE_TOP_K=int(os.getenv("RETRIEVE_TOP_K", 5))
    LLMCACHE_THRESHOLD=float(os.getenv("LLMCACHE_THRESHOLD", 0.95))
    
config = AppConfig()

## Extract Text from Documents and View Results

In [4]:
path = config.DOCS_FOLDER
docs = []
for file in os.listdir(path):
    print(file, flush=True)
    loader = PyPDFLoader(os.path.join(path, file))
    docs.extend(loader.load())
    
#Show extracted pages
def pagenumber_change(n):
    page = docs[int(n)]
    return page.page_content,page.metadata

with gr.Blocks() as demopage:
    gr.Markdown("### Pages Content")
    page = gr.TextArea(label="Page Content",max_lines=10,value=docs[0].page_content)
    meta_data = gr.Textbox(label="MetaData",value=docs[0].metadata)
    page_number = gr.Slider(label="Page Number",  minimum=0, maximum= len(docs)-1, value=0,step=1, scale=1)
    page_number.release(pagenumber_change, inputs=[page_number], outputs=[page,meta_data])
        
demopage.launch(share=False)

print(docs[1])
print(type(docs[1]))
print(type(docs[1].metadata))

The BMW i4 - Brochure.pdf
Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


page_content='MODELSHIGHLIGHTS  \nJOURNEY EQUIPMENTTECHNICAL VALUES AND SERVICES\nEquipment\nExterior colour world Interior colour worldM Aerodynamic KitWheels and tyresOriginal BMW AccessoriesT echnical valuesBMW Services Highlights  \n• Exterior and interior design • Driving dynamics • Charging • Connectivity and infotainment • Assistance systems \nCONTENTSBMW i4 eDrive40: \n19" aerodynamic wheels 855 bicolour with mixed tyres, exterior colour in Mineral White metallic, sport seats in \'Vernasca\' leather Oyster decorative stitching| Black, interior trim finisher in fine-wood trim Oak grain open-pored with Pearl Chrome accent strip.THE FIRST -EVER FULL Y-ELECTRIC BMW i4\nDISCOVER MORE.\nScan the code to get more information \nand more driving pleasure on the first-ever fully-electric  BMW i4.\n' metadata={'source': 'pdfs/The BMW i4 - Brochure.pdf', 'page': 1}
<class 'langchain_core.documents.base.Document'>
<class 'dict'>


## Split Documents and View Results

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP
)
splits = text_splitter.split_documents(docs)

#Show splits with overlaps
def docnumber_change(n):
    doc = splits[int(n)]
    return doc.page_content,doc.metadata

with gr.Blocks() as demodoc:
    gr.Markdown("### View Split Documents")
    doc_content = gr.TextArea(label="Document Content",max_lines=10,value=splits[0].page_content)
    doc_meta_data = gr.Textbox(label="MetaData",value=splits[0].metadata)
    doc_number = gr.Slider(label="Document Number",  minimum=0, maximum= len(splits)-1, value=0,step=1, scale=1)
    doc_number.release(docnumber_change, inputs=[doc_number], outputs=[doc_content,doc_meta_data])
        
demodoc.launch(share=False)

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




## Create Embeddings and Store in VectorDB

In [6]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

embeddings = OpenAIEmbeddings(
    openai_api_key=config.OPENAI_API_KEY,
    model=config.OPENAI_EMBEDDING_MODEL
)


# Check if not already vectorized (currently at path level, not at path/file level)
embeddingsDone = redisclient.Redis.from_url(config.REDIS_URL)
embeddingsDoneForDoc = embeddingsDone.sismember("doc:chatbot:path", path)
if not embeddingsDoneForDoc:
    # Azure OpenAI limit inputs at 16 for now
    vectordb = None
    for splitN in chunker(splits, 16):
        if vectordb == None:
            vectordb = Redis.from_documents(
                splitN, embeddings, redis_url=config.REDIS_URL, index_name="chatbot"
            )
        else:
            #foo = True
            vectordb.add_documents(splitN)
    embeddingsDone.sadd("doc:chatbot:path", path)
else:
    print("Found existing embeddings in doc:chatbot:path for "+ path, flush=True)
    schema = {
        'text': [
            {'name': 'source', 
             'weight': 1, 
             'no_stem': False, 
             'withsuffixtrie': False, 
             'no_index': False, 
             'sortable': False}, 
            {'name': 'content', 
             'weight': 1, 
             'no_stem': False, 
             'withsuffixtrie': False, 
             'no_index': False, 
             'sortable': False}
        ], 
        'numeric': [
            {'name': 'page', 
             'no_index': False, 
             'sortable': False}
        ], 
        'vector': [
            {'name': 'content_vector', 
             'dims': 1536, 'algorithm': 'FLAT', 
             'datatype': 'FLOAT32', 
             'distance_metric': 'COSINE', 
             'initial_cap': 20000, 
             'block_size': 1000}
        ]
    }
    vectordb = Redis.from_existing_index(
            embeddings,
            index_name="chatbot",
            redis_url=config.REDIS_URL,
            schema = schema
    )

print(vectordb.schema)

Found existing embeddings in doc:chatbot:path for pdfs/
{'text': [{'name': 'source', 'weight': 1.0, 'no_stem': False, 'withsuffixtrie': False, 'no_index': False, 'sortable': False}, {'name': 'content', 'weight': 1.0, 'no_stem': False, 'withsuffixtrie': False, 'no_index': False, 'sortable': False}], 'numeric': [{'name': 'page', 'no_index': False, 'sortable': False}], 'vector': [{'name': 'content_vector', 'dims': 1536, 'algorithm': 'FLAT', 'datatype': 'FLOAT32', 'distance_metric': 'COSINE', 'initial_cap': 20000, 'block_size': 1000}]}


  warn_deprecated(


## Define Retriever

In [7]:
retriever = vectordb.as_retriever(search_kwargs={"k": config.RETRIEVE_TOP_K})
tool = create_retriever_tool(retriever, "search_bmw_i4",
       "Searches and returns snippets from the BMW i4 brochure.")
tools = list()
tools.append(tool)
#Show the returned tool
print(tool)

name='search_bmw_i4' description='Searches and returns snippets from the BMW i4 brochure.' args_schema=<class 'langchain.tools.retriever.RetrieverInput'> func=<bound method BaseRetriever.get_relevant_documents of RedisVectorStoreRetriever(tags=['Redis', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.redis.base.Redis object at 0x2adbd0450>, search_kwargs={'k': 5})> coroutine=<bound method BaseRetriever.aget_relevant_documents of RedisVectorStoreRetriever(tags=['Redis', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.redis.base.Redis object at 0x2adbd0450>, search_kwargs={'k': 5})>


## Set up the Redis LLMCache Built with OpenAI Text Embeddings

In [8]:
llmcache_embeddings = OpenAITextVectorizer(
    api_config={'api_key': config.OPENAI_API_KEY},
    model=config.OPENAI_EMBEDDING_MODEL
)

schema = {
    "index": {
        "name": "cache",
        "prefix": "llmcache",
    },
    "fields": {
        "vector": [{
                "name": "prompt_vector",
                "dims": 1536,
                "distance_metric": "cosine",
                "algorithm": "flat",
                "datatype": "float32"}
        ]
    },
}

cache = SearchIndex.from_dict(schema)
cache.connect(config.REDIS_URL)
cache.create(overwrite=True)

llmcache = SemanticCache(
    redis_url=config.REDIS_URL,
    threshold=config.LLMCACHE_THRESHOLD, # semantic similarity threshold
    vectorizer=llmcache_embeddings,
    index=cache
)
print(llmcache.index.info())

{'index_name': 'cache', 'index_options': [], 'index_definition': ['key_type', 'HASH', 'prefixes', ['llmcache'], 'default_score', '1'], 'attributes': [['identifier', 'prompt_vector', 'attribute', 'prompt_vector', 'type', 'VECTOR']], 'num_docs': '0', 'max_doc_id': '0', 'num_terms': '0', 'num_records': '0', 'inverted_sz_mb': '0', 'vector_index_sz_mb': '0.15383148193359375', 'total_inverted_index_blocks': '2836', 'offset_vectors_sz_mb': '0', 'doc_table_size_mb': '0', 'sortable_values_size_mb': '0', 'key_table_size_mb': '0', 'geoshapes_sz_mb': '0', 'records_per_doc_avg': 'nan', 'bytes_per_record_avg': 'nan', 'offsets_per_term_avg': 'nan', 'offset_bits_per_record_avg': 'nan', 'hash_indexing_failures': '0', 'total_indexing_time': '0', 'indexing': '1', 'percent_indexed': '0.575', 'number_of_uses': 1, 'cleaning': 0, 'gc_stats': ['bytes_collected', '0', 'total_ms_run', '0', 'total_cycles', '0', 'average_cycle_time_ms', 'nan', 'last_run_time_ms', '0', 'gc_numeric_trees_missed', '0', 'gc_blocks_de

## Configure the Conversational Chat Agent that Can Use the Redis Vector DB for RAG

In [9]:
# Setup Redis memory for conversation history
msgs = RedisChatMessageHistory(
    session_id="my_session", url=config.REDIS_URL
)

memory = ConversationBufferMemory(
    memory_key="chat_history", chat_memory=msgs, return_messages=True
)
chatLLM = ChatOpenAI(
    model_name=config.OPENAI_CHAT_MODEL,
    streaming=True
)
PREFIX = """"You are a friendly AI assistant that can provide information on the BMW i4 based on the provided PDF manual. Users can ask questions of your manual! Only use information from the prompt. You should not make anything up."""

FORMAT_INSTRUCTIONS = """You have access to the following tools:

{tools}

Use the following format:

'''
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
'''

When you have gathered all the information required, respond to the user in a friendly manner.
"""

SUFFIX = """

Begin! Remember to give detailed, informative answers

Previous conversation history:
{chat_history}

New question: {input}
{agent_scratchpad}
"""
agent = initialize_agent(
    tools,
    chatLLM,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=False,
    memory=memory,
    agent_kwargs={
        'prefix': PREFIX,
        'format_instructions': FORMAT_INSTRUCTIONS,
        'suffix': SUFFIX
    }
)

  warn_deprecated(
  warn_deprecated(


## Generate a Response to the User's Question after Checking the Cache (if Enabled).

In [10]:
from langchain.globals import set_debug
set_debug(True)

def generate_response(
    use_cache: bool,
    llmcache: SemanticCache,
    user_query: str,
    agent
) -> str:
    t0 = time()
    if use_cache:
        if response := llmcache.check(user_query):
            print("Cache Response Time (secs)", time()-t0, flush=True)
            return response[0]
 
    retrieval_handler = BaseCallbackHandler()
    response = agent.run(input=user_query, callbacks=[retrieval_handler])
    print("Full Response Time (secs)", time()-t0, flush=True)
    if use_cache:
        llmcache.store(user_query, response)
    return response

## Use the Chatbot

In [11]:
from langchain.globals import set_verbose
set_verbose(True)

use_cache = False

def response(query):
    return generate_response(use_cache, llmcache, query, agent)

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Submit** to see the response.")
    with gr.Row():
        inp = gr.Textbox(label="Prompt", placeholder="Ask me anything about the BMW i4!")
        out = gr.Textbox(label="LLM Response")
    btn = gr.Button("Submit")
    clear = gr.ClearButton([inp, out])
    btn.click(fn=response, inputs=inp, outputs=out)

demo.launch(show_api=False, share=False)

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




  warn_deprecated(


[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts 