# Setup

### Install Dependencies

In [4]:
# Import required packages
import os
import pandas as pd
from getpass import getpass
import kdbai_client as kdbai
import time
import multiprocessing
# langchain packages
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import KDBAI
from langchain_openai import OpenAI
from langchain.chains.question_answering import load_qa_chain

### Start KDB.AI session, define table schema and chunk the data.

In [5]:
# Add OpenAI credentials
os.environ['OPENAI_API_KEY'] = "<your API key>"

#Load the documents we want to prompt an LLM about
loader = TextLoader('./data/TorQ+Conf.txt')
doc = loader.load()
# Chunk the documents into 500 character chunks using langchain's text splitter "RucursiveCharacterTextSplitter"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
#split_documents produces a list of all the chunks created, printing out first chunk for example
chunks = text_splitter.split_documents(doc) 
pages = [p.page_content for p in chunks]

# Define OpenAI Text Embedding Model
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# Establish a kdb.ai instance
session = kdbai.Session(endpoint="http://localhost:8082")

# Define a vector DB table schema for storing embeddings
rag_schema = {
    "columns": [
        {"name": "id", "pytype": "str"},
        {"name": "text", "pytype": "bytes"},
        {
            "name": "embeddings",
            "pytype": "float32",
            "vectorIndex": {"dims": 1536, "metric": "L2", "type": "flat"},
        },
    ]
}

### Store data in vector database. 

To embed and add our texts we must first run 'vecdb_kdbai.add_texts(texts=texts)' for a few seconds, before interupting the kernel and running 'vecdb_kdbai.aadd_texts(texts=texts)'.

In [None]:
table = session.create_table("rag_langchain", rag_schema)

In [None]:
# First ensure the table does not already exist
try:
    session.table("rag_langchain").drop()
    time.sleep(5)
except kdbai.KDBAIException:
    pass
table = session.create_table("rag_langchain", rag_schema)
# use KDBAI as vector store
vecdb_kdbai = KDBAI(table, embeddings)
# Foo function
def foo(n):
    vecdb_kdbai.add_texts(texts=pages)

if __name__ == '__main__':
    # Start foo as a process
    p = multiprocessing.Process(target=foo, name="Foo", args=(5,))
    p.start()
# Wait for a 5 seconds max for foo
# Usage: join([timeout in seconds])
p.join(5)

# If thread is active
if p.is_alive():
    print("Kill the add_texts Function")

    # Terminate foo
    p.terminate()
    p.join()
vecdb_kdbai.aadd_texts(texts=pages)

### Create chat store 'messages' to record the conversation.

In [6]:
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)
messages = [
    SystemMessage(content="You are a TorQ Expert Smart Support bot called 'TESS'."),
    SystemMessage(content="Interpret the following acronyms when answering questions. RDB is an acronym for real time database, WDB is an acronym for write database, CTP is an acronym for chained tickerplant, STP is an acronym for segmented tickerplant, TP is an acronym for tickerplant, and HDB is an acronym for historical database."),
]

K=10

### Define function to contextualise prompts with the previous conversation.

In [7]:
def reset_history():
    global messages
    messages = [
        SystemMessage(content="You are a TorQ Expert Smart Support bot called 'TESS'."),
        SystemMessage(content="Interpret the following acronyms when answering questions. RDB is an acronym for real time database, WDB is an acronym for write database, CTP is an acronym for chained tickerplant, STP is an acronym for segmented tickerplant, TP is an acronym for tickerplant, and HDB is an acronym for historical database."),
    ]
    
chat = ChatOpenAI(model='gpt-3.5-turbo-16k', temperature=0.0)

def rel_info(msgs):
    if len(msgs) > 3:
        return msgs[:2], msgs[-1]
    else:
        return msgs

def contextualise_query(query: str):
    contextualize_q_system_prompt = f"""If it seems like a user asks about something related to the Chat History 
    Given the following conversation and a follow up question, 
    REPHRASE ONLY the follow up question to be a clear concise question that can be understood without context
    based on the conversation. 
    Rewrite 'RDB' with 'real time database', 
    Rewrite 'HDB' with 'historical database', 
    Rewrite 'WDB' with 'write database', 
    Rewrite 'STP' with 'segmented tickerplant', 
    Rewrite 'CTP' with 'chained tickerplant'.
    If "in TorQ" is not mentioned in the question, mention it at the end. 
    After the question write 'Explain fully.'.
    
    Chat History:
    {rel_info(messages)}

    Follow up question:
    {query}"""
    return chat.invoke(contextualize_q_system_prompt)
    
# After the question write 'Explain fully.'.

### Define TESS.

In [8]:
K = 10
qabot = RetrievalQA.from_chain_type(chain_type='stuff',
                                    llm=ChatOpenAI(model='gpt-3.5-turbo-16k', temperature=0.0), 
                                    retriever=vecdb_kdbai.as_retriever(search_kwargs=dict(k=K)),
                                    return_source_documents=True)
def TESS(query):
    query = contextualise_query(query).content
    messages.extend([HumanMessage(content=query)])
    print(f'\n\n{query}\n')
    messages.extend([AIMessage(content=qabot.invoke(dict(query=query))['result'])])
    print(messages[-1].content)

# Time to run some queries.

In [21]:
query = "How do I restart a process?"
TESS(query)



How do I restart a process in TorQ? Explain fully.

To restart a process in TorQ, you need to follow these steps:

1. Identify the process you want to restart. This can be done by checking the list of available processes in the `.servers.SERVERS` table. Each process has a unique `proctype` and `procname` associated with it.

2. Open a command prompt or terminal and navigate to the TorQ installation directory.

3. Run the following command to restart the process:
   ```
   q torq.q -load code/processes/<process_name>.q -p <port_number> -proctype <process_type> -procname <process_name>
   ```
   Replace `<process_name>` with the name of the process you want to restart, `<port_number>` with the port number the process is running on, `<process_type>` with the type of the process, and `<process_name>` with the name of the process.

   For example, if you want to restart the `reporter1` process running on port `20004`, the command would be:
   ```
   q torq.q -load code/processes/reporter.

In [19]:
reset_history()

In [9]:
messages

[SystemMessage(content="You are a TorQ Expert Smart Support bot called 'TESS'."),
 SystemMessage(content='Interpret the following acronyms when answering questions. RDB is an acronym for real time database, WDB is an acronym for write database, CTP is an acronym for chained tickerplant, STP is an acronym for segmented tickerplant, TP is an acronym for tickerplant, and HDB is an acronym for historical database.')]

In [37]:
session.list()

[]

In [41]:
session.table('rag_langchain').drop()

True