In [2]:
import getpass

import weaviate

from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
from langchain.llms import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationTokenBufferMemory, ConversationSummaryBufferMemory

In [3]:
from dotenv import load_dotenv
import os

load_dotenv('../.env')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL')

In [4]:
client = weaviate.Client(
    url=WEAVIATE_URL,
    auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
db = Weaviate(client, 'RGDocs', 'text', embeddings, by_text=False)
db._query_attrs = [p['name'] for p in client.schema.get('RGDocs')['properties']]

In [5]:
query = 'What do you know about Revenue Grid?'

In [6]:
docs = db.similarity_search(query, by_text=False)
docs

[Document(page_content="Have a question?\nYou are at the right place.\nRevenue Grid (RG) is a premium web-based guided selling platform.\nIt's Salesforce integration suite that includes efficient and immediate sales playbook automation for highly effective enterprise sales:\n1. Strengthening sales forecast\n2. Improving efficiency of carrying out routine Sales tasks\n3. Running sophisticated Email, SMS or Phone call type of nurturing/drip campaigns\n4. Supervising colleagues for better sales team coaching techniques\nFor technical support, please contact\nRG support team\n.\nKnowledge Base navigation\nRevenue Grid Knowledge Base is subdivided into three major parts, which correspond to offered products:\nProductivity Package & Activity Capture.\nThis part covers using RG Email Sidebar and RG Sync Engine: user and admin settings, user actions, configuration, fine-tuning, etc.\nEngagement Package.\nThis part covers using RG Sequences, communication automation solution: user and admin set

In [7]:
docs[0].metadata

{'doc_md_filepath': 'index.md',
 'doc_navigation_path': '',
 'doc_title': 'Salesforce integration Knowledge Base',
 'doc_url': 'https://docs.revenuegrid.com/',
 'section_token_length': 488,
 'short_site_name': 'Sfcc',
 'site_name': 'Revenue Grid Knowledge Base'}

In [None]:
print(docs[0].page_content)
docs[0].metadata['doc_title'], docs[0].metadata['doc_url']

In [47]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY)

In [8]:
import tiktoken
from langchain.llms import BaseLLM

class DummyLLM(BaseLLM):
    _tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')

    def predict(self, text):
        return 'This is LLM Response'

    def get_num_tokens(self, text):
        return len(self._tokenizer.encode(text))

    def _generate(
        self,
        prompts,
        stop = None,
        run_manager = None,
        **kwargs,
    ):
        """Run the LLM on the given prompts."""

    async def _agenerate(
        self,
        prompts,
        stop = None,
        run_manager = None,
        **kwargs,
    ):
        """Run the LLM on the given prompts."""

    def _llm_type(self) -> str:
        return "Return type of llm."

In [9]:
llm = DummyLLM()
memory = ConversationTokenBufferMemory(llm=llm, max_token_limit=50)

In [10]:
SYSTEM_MESSAGE = \
"""You are an AI assistant for the Revenue Grid documentation.
You are given a question and extracted parts of product documentation. Provide a conversational answer to the question using the pieces of information provided.
If the question includes a request for code, provide a code block directly from the documentation.
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
If the question is not about Revenue Grid, politely inform them that you are tuned to only answer questions about Revenue Grid.
"""
TOKEN_LIMIT = 1000
def process_query(query):
    global memory

    # memory.append(f'Question: {query}')
    history = memory.load_memory_variables({})['history'] + '\n' + f'Human: {query}'
    print(f'{history = }')
    # relevant_docs = db.similarity_search(history, by_text=False)
    relevant_docs = docs

    while sum([llm.get_num_tokens(doc.page_content) for doc in relevant_docs]) > TOKEN_LIMIT:
        relevant_docs = relevant_docs[:-1]

    summaries = [doc.page_content for doc in relevant_docs]
    summaries = '\n'.join(summaries)

    sources = 'Sources:\n' + '\n'.join([f"[{doc.metadata['doc_title']}]({doc.metadata['doc_url']})" for doc in relevant_docs])

    prompt_to_llm = SYSTEM_MESSAGE + history + 'Summaries:\n' + summaries
    print(f'{prompt_to_llm = }')
    llm_answer = llm.predict(prompt_to_llm)
    display_answer = llm_answer + '\n--------------\n' + sources

    # memory.append(f'Answer: {llm_answer}')
    memory.save_context({'input': query}, {'output': llm_answer})

    return display_answer

In [11]:
print(f'{process_query(query) = }')

history = '\nHuman: What do you know about Revenue Grid?'
prompt_to_llm = 'You are an AI assistant for the Revenue Grid documentation.\nYou are given a question and extracted parts of product documentation. Provide a conversational answer to the question using the pieces of information provided.\nIf the question includes a request for code, provide a code block directly from the documentation.\nIf you don\'t know the answer, just say "Hmm, I\'m not sure." Don\'t try to make up an answer.\nIf the question is not about Revenue Grid, politely inform them that you are tuned to only answer questions about Revenue Grid.\n\nHuman: What do you know about Revenue Grid?Summaries:\nHave a question?\nYou are at the right place.\nRevenue Grid (RG) is a premium web-based guided selling platform.\nIt\'s Salesforce integration suite that includes efficient and immediate sales playbook automation for highly effective enterprise sales:\n1. Strengthening sales forecast\n2. Improving efficiency of carryin

In [12]:
memory.load_memory_variables({})['history'], llm.get_num_tokens(memory.load_memory_variables({})['history'])

('Human: What do you know about Revenue Grid?\nAI: This is LLM Response', 17)

In [13]:
memory.chat_memory

ChatMessageHistory(messages=[HumanMessage(content='What do you know about Revenue Grid?', additional_kwargs={}, example=False), AIMessage(content='This is LLM Response', additional_kwargs={}, example=False)])