In [2]:
import os
import requests
import getpass

from dotenv import load_dotenv

import weaviate

from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
from langchain.llms import OpenAI, FakeListLLM
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationTokenBufferMemory, ConversationSummaryBufferMemory

In [5]:
load_dotenv('../app/api/.env')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL')

In [None]:
client = weaviate.Client(
    url=WEAVIATE_URL,
    auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
db = Weaviate(client, 'RGDocs', 'text', embeddings, by_text=False, attributes=['doc_title', 'doc_url'])

In [None]:
client.query.get('RGDocs', ['text', 'doc_title', 'doc_url']).with_where({
    'path': ['doc_url'],
    'operator': 'Equal',
    'valueString': "https://revenuegrid.com/ssi/docs/kb/articles/Privacy-and-Securityqwe"
}).do()['data']['Get']['RGDocs']

In [None]:
client.batch.delete_objects(
    'RGDocs',
    {
    'path': ['doc_url'],
    'operator': 'Equal',
    'valueString': "https://revenuegrid.com/ssi/docs/kb/articles/Privacy-and-Security"
    },
    output='verbose',
    dry_run=True)

In [None]:
url = 'http://0.0.0.0:8000/pages/'
params = {
    'doc_url': 'https://revenuegrid.com/ssi/docs/kb/articles/Privacy-and-Security'
}
headers = {
    'accept': 'application/json'
}

response = requests.post(url, params=params, headers=headers)

# Print the response
print(response.json())

In [None]:
params = {
    'doc_url': 'https://revenuegrid.com/ssi/docs/kb/articles/Privacy-and-Security',
    'dry_run': True,
}
r = requests.delete('http://127.0.0.1:8000/pages/delete', params=params, headers=headers)
r.json()

In [14]:
query = 'What do you know about Revenue Grid?'

data = {
    'user_message': query,
    'history': [('qwe', 'rty')]
}
headers = {
    'accept': 'application/json'
}

r = requests.post('http://localhost:8000/chat/', json=data, headers=headers)
r.json()

{'llm_answer': 'fake llm response',
 'sources': [{'doc_title': 'Salesforce integration Knowledge Base',
   'doc_url': 'https://docs.revenuegrid.com/'},
  {'doc_title': 'Release notes',
   'doc_url': 'https://docs.revenuegrid.com/articles/release-notes-intelligence'},
  {'doc_title': 'How to open Revenue Grid',
   'doc_url': 'https://docs.revenuegrid.com/articles/Setup'},
  {'doc_title': 'Integration with Salesforce Customer / Partner Community',
   'doc_url': 'https://docs.revenuegrid.com/ri/fast/articles/Partner-Community-Integration'}]}

In [None]:
query[query.find('do ')+len('do '):]

In [None]:
docs = db.similarity_search(query, by_text=False)
docs

In [None]:
docs[0].metadata

In [None]:
print(docs[0].page_content)
docs[0].metadata['doc_title'], docs[0].metadata['doc_url']

In [6]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY, cache=False)

In [None]:
import tiktoken
from langchain.llms import BaseLLM

class DummyLLM(BaseLLM):
    _tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')

    def predict(self, text):
        return 'This is LLM Response'

    def get_num_tokens(self, text):
        return len(self._tokenizer.encode(text))

    def _generate(
        self,
        prompts,
        stop = None,
        run_manager = None,
        **kwargs,
    ):
        """Run the LLM on the given prompts."""

    async def _agenerate(
        self,
        prompts,
        stop = None,
        run_manager = None,
        **kwargs,
    ):
        """Run the LLM on the given prompts."""

    def _llm_type(self) -> str:
        return "Return type of llm."

In [57]:
llm = DummyLLM()

In [64]:
memory = ConversationTokenBufferMemory(llm=llm, max_token_limit=50)

In [65]:
memory.save_context({'input': 'query'}, {'output': 'llm_answer'})

In [66]:
memory.chat_memory

ChatMessageHistory(messages=[HumanMessage(content='query', additional_kwargs={}, example=False), AIMessage(content='llm_answer', additional_kwargs={}, example=False)])

In [None]:
from langchain.memory.chat_message_histories import ChatMessageHistory

ChatMessageHistory.from_obj()

In [62]:
from copy import deepcopy

d = deepcopy(memory.dict())
# d['llm'] = llm
del d['llm']
d

{'chat_memory': {'messages': [{'content': 'query',
    'additional_kwargs': {},
    'example': False},
   {'content': 'llm_answer', 'additional_kwargs': {}, 'example': False}]},
 'output_key': None,
 'input_key': None,
 'return_messages': False,
 'human_prefix': 'Human',
 'ai_prefix': 'AI',
 'memory_key': 'history',
 'max_token_limit': 50}

In [63]:
ConversationTokenBufferMemory.parse_obj(memory.dict())

ValidationError: 2 validation errors for ConversationTokenBufferMemory
chat_memory
  instance of BaseChatMessageHistory expected (type=type_error.arbitrary_type; expected_arbitrary_type=BaseChatMessageHistory)
llm
  Can't instantiate abstract class BaseLanguageModel with abstract methods agenerate_prompt, apredict, apredict_messages, generate_prompt, predict, predict_messages (type=type_error)

In [None]:
SYSTEM_MESSAGE = \
"""You are an AI assistant for the Revenue Grid documentation.
You are given a question and extracted parts of product documentation. Provide a conversational answer to the question using the pieces of information provided.
If the question includes a request for code, provide a code block directly from the documentation.
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
If the question is not about Revenue Grid, politely inform them that you are tuned to only answer questions about Revenue Grid.
"""
TOKEN_LIMIT = 1000
def process_query(query):
    global memory

    # memory.append(f'Question: {query}')
    history = memory.load_memory_variables({})['history'] + '\n' + f'Human: {query}'
    print(f'{history = }')
    # relevant_docs = db.similarity_search(history, by_text=False)
    relevant_docs = docs

    while sum([llm.get_num_tokens(doc.page_content) for doc in relevant_docs]) > TOKEN_LIMIT:
        relevant_docs = relevant_docs[:-1]

    summaries = [doc.page_content for doc in relevant_docs]
    summaries = '\n'.join(summaries)

    sources = 'Sources:\n' + '\n'.join([f"[{doc.metadata['doc_title']}]({doc.metadata['doc_url']})" for doc in relevant_docs])

    prompt_to_llm = SYSTEM_MESSAGE + history + 'Summaries:\n' + summaries
    print(f'{prompt_to_llm = }')
    llm_answer = llm.predict(prompt_to_llm)
    display_answer = llm_answer + '\n--------------\n' + sources

    # memory.append(f'Answer: {llm_answer}')
    memory.save_context({'input': query}, {'output': llm_answer})

    return display_answer

In [None]:
print(f'{process_query(query) = }')

In [None]:
memory.load_memory_variables({})['history'], llm.get_num_tokens(memory.load_memory_variables({})['history'])

In [None]:
memory.chat_memory