In [1]:
from pinecone_code import *
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain_community.chat_models import ChatCohere
from langchain.chains.conversation.memory import  ConversationBufferWindowMemory, ConversationSummaryBufferMemory, ConversationBufferMemory, ConversationSummaryMemory                             
from langchain.schema import SystemMessage, HumanMessage
from langchain.prompts.chat import HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain.callbacks import get_openai_callback
import dotenv
dotenv.load_dotenv()

# Tpken limit: 16,385
# https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset

# PENDING : Move these to a config file
INDEX_NAME = 'resonate-meeting-index' #'langchain-retrieval-transcript'
PINECONE_VECTOR_DIMENSION = 3072 #1536
PINECONE_UPSERT_BATCH_LIMIT = 90
PINECONE_TOP_K_RESULTS = 3
DELTA = 5
CLOUD_PROVIDER = 'aws'
REGION = 'us-west-2'
METRIC = 'cosine'

EMBEDDING = 'OpenAI'
EMBEDDING_MODEL = 'text-embedding-3-large' #'text-embedding-ada-002'
# 

NAMESPACE = 'default_namespace'
master_json_file = 'master_meeting_details'

LLM_MODEL = 'gpt-3.5-turbo' # gpt-3.5-turbo-1106
LLM_TEMPERATURE = 0.0
CONV_BUFFER_MEMORY_WINDOW = 2
LLM_SUMMARY_MAX_TOKEN_LIMIT = 250

In [2]:
class LangChain:
    def __init__(self):
        self.pinecone_obj = PineconeServerless()
        self.llm=ChatOpenAI(temperature=LLM_TEMPERATURE, model_name=LLM_MODEL, streaming=False)
        #self.llm=ChatCohere(model='command', temperature=0)
        self.conversation_bufw = ConversationChain(llm=self.llm, memory=ConversationBufferWindowMemory(k=CONV_BUFFER_MEMORY_WINDOW))
        #self.conversation_bufw = ConversationChain(llm=self.llm, memory=ConversationSummaryBufferMemory(llm=self.llm, max_token_limit=LLM_SUMMARY_MAX_TOKEN_LIMIT))
        #self.conversation_bufw = ConversationChain(llm=self.llm, memory=ConversationSummaryMemory(llm=self.llm))
        self.df = pd.DataFrame(columns=['Query', 'LLM Input', 'History', 
                                        'LLM Response', 'Tokens Used',
                                        'Prompt Tokens','Completion Tokens', 
                                        'Total Cost (USD)'])

    def prompt(self, query, context):
        system_template = SystemMessagePromptTemplate.from_template(
            'You are a helpful assistant.'
            'You are provided with a context below. You are expected to answer the user query based on the context below.'
            'The context provided is a part of transcript of a meeting, in the format:'
            'Conversations in meeting: <meeting_title>'
            'Start Time - Speaker: Text \n'

            'You will respond using the context below only. If you cannot find an answer from the below context, you can ask for more information.'
            'You answers should be concise and relevant to the context.'
            'You can mention the meeting_title in your response if you want to refer to the meeting.'
            'You are not allowed to talk about anything else other than the context below.'
            'You cannot use any external information other than the context below.'
            'No need to greet or say goodbye. Just answer the user query based on the context below.'
            'You can also skip mentioning phrases such as : Based on the context provided. Instead simply answer the user query based on the context below.\n\n'
            'Context:\n'
            '{context}'
        )
        # system_template = SystemMessagePromptTemplate.from_template(
        #     'You are a helpful assistant.'
        #     'You will answer the user query based on the context below.'
        #     'You are also provided with the chat history of the user query and the response. You can use this information to answer the user query as well'
        #     'Context: \n'
        #     '{context}'
        # )

        human_template = HumanMessagePromptTemplate.from_template(' \nUser Query: {input}')
        chat_prompt = ChatPromptTemplate.from_messages([system_template, human_template])
        
        chat_prompt_value = chat_prompt.format_prompt(
            context = context,
            input = query
        )
        #print(chat_prompt_value)
        return chat_prompt_value.to_messages()


    def query_chatbot(self, query, context):
        self.messages = self.prompt(query, context)
        #resp = self.conversation_bufw(self.messages)
        resp, callback = self.count_tokens(self.conversation_bufw, self.messages)
        # append resp, callback to df
        #print("Resp: ", resp)
        #print("Callback: ", callback)

    
        self.df = pd.concat([self.df, pd.DataFrame({
            'Query': query,
            'LLM Input': str(resp['input']), 
            'History': str(resp['history']), 
            'LLM Response': str(resp['response']), 
            'Tokens Used': callback['Tokens Used'],
            'Prompt Tokens': callback['Prompt Tokens'],
            'Completion Tokens': callback['Completion Tokens'],
            'Total Cost (USD)': str(callback['Total Cost (USD)']).replace('$', '')
             }, 
            index = [0])], ignore_index=True)

        print("Tokens Used: ", callback['Tokens Used'])
        return resp
        #return resp['response']
    
    def parse_conversations(self, conversations) -> str:
        data = []
        for cluster_id, cluster_df in conversations.items():
            with open(f'../../bin/data/default_namespace/{cluster_id}.json') as f:
                meeting_data = json.load(f)
                meeting_title = meeting_data['meeting_title']
                data.append(f"Conversations in meeting '{meeting_title}':")
                for i, row in cluster_df.iterrows():
                    data.append(f"{row['start_time']} - {row['speaker']}: {row['text']}")
                data.append("\n\n")
        data = '\n'.join(data)
        return data

    def clear_conversational_memory(self):
        self.conversation_bufw.memory.clear()

    def chat(self, query, in_filter: list[str]=[], complete_db_flag:bool = True):
        if 'summary' in query:
            pass
        self.pinecone_obj.query_pinecone(query, in_filter, complete_db_flag)
        conversation = self.pinecone_obj.query_delta_conversations()
        context = self.parse_conversations(conversation)
        #print(context)
        try:
            response = self.query_chatbot(query, context)
        except Exception as e:
            print(f'Error: {e}')
            response = "Oops! you have exhausted the token limit, clearing the conversational memory. Please try again."   
            self.clear_conversational_memory() 
        return response
    
    def count_tokens(self, chain, query):
        with get_openai_callback() as callback:
            response = chain(query)
            #print(f'Call Back:  {callback}')
            print(f'Spent a total of {callback.total_tokens} tokens')
            callback = str(callback)
            lines = callback.split('\n')
            data = {}
            for line in lines:
                parts = line.split(':')
                if len(parts) == 2:
                    key = parts[0].strip()
                    value = parts[1].strip()
                    data[key] = str(value)
            #print(data)       
            return response, data
    

In [None]:
obj = LangChain()

In [None]:
obj.chat("How much is the compensation for the job?")

In [None]:
obj.chat("Whats the minimum age that you need to be in order to do shadowing")

In [None]:
obj.chat("Who was talking about electives?")

In [None]:
obj.chat("Why were the volunteer programs canceled?")

In [None]:
obj.chat("The minimum age to do what is 17?")

In [None]:
obj.df

In [None]:
obj.chat("Who will be sending the emails?")

In [None]:
obj.chat("From which school is the director from?")

In [None]:
obj.chat("When are the sessions scheduled to take place each month?")

In [None]:
obj.chat("Who is the director of Human Resources at Premier Medical Group?")

In [3]:
qna = pd.read_csv('qna.csv')
obj = LangChain()

In [4]:
for i in range(50):
    print(i)
    query = qna['questions'][i]
    response = obj.chat(query)
    if response == 'Oops! you have exhausted the token limit, clearing the conversational memory. Please try again.':
        print(response)
        break 

0
Fetch window:  ['19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
Fetch window:  ['1', '2', '3', '4', '5', '6', '7']
Fetch window:  ['41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51']


  warn_deprecated(


Spent a total of 5160 tokens
Tokens Used:  5160
1
Fetch window:  ['34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44']
Fetch window:  ['46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56']
Fetch window:  ['40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50']
Spent a total of 9311 tokens
Tokens Used:  9311
2
Fetch window:  ['56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66']
Fetch window:  ['53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63']
Fetch window:  ['51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61']
Spent a total of 11985 tokens
Tokens Used:  11985
3
Fetch window:  ['35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45']
Fetch window:  ['34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44']
Fetch window:  ['76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86']
Spent a total of 9058 tokens
Tokens Used:  9058
4
Fetch window:  ['68', '69', '70', '71', '72', '73', '74', '7

In [5]:
obj.df.to_csv(f'ConversationBufferWindowMemory_{CONV_BUFFER_MEMORY_WINDOW}tokens_.csv', index=False)

In [6]:
obj.df.to_excel(f'ConversationBufferWindowMemory_{CONV_BUFFER_MEMORY_WINDOW}K_.xlsx', index=False)

In [7]:
obj.df.to_json(f'ConversationBufferWindowMemory_{CONV_BUFFER_MEMORY_WINDOW}K_.json', orient='records')

In [8]:
obj.df

Unnamed: 0,Query,LLM Input,History,LLM Response,Tokens Used,Prompt Tokens,Completion Tokens,Total Cost (USD)
0,How much is the compensation for the job?,"[SystemMessage(content=""You are a helpful assi...",,"The compensation for the job is about $32,000 ...",5160,5137,23,0.0077515
1,Whats the minimum age that you need to be in o...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",The minimum age to do shadowing is typically a...,9311,9296,15,0.013974
2,Who was talking about electives?,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",Miss Ingham was talking about electives.,11985,11975,10,0.0179825
3,The minimum age to do what is 17?,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",The minimum age to do shadowing is 17 years old.,9058,9045,13,0.0135935
4,Why were the volunteer programs canceled?,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",The volunteer programs were canceled because o...,6061,6050,11,0.009097
5,Who is the director of Human Resources at Prem...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",The director of Human Resources at Premier Med...,7664,7649,15,0.0115035
6,What types of healthcare positions does Premie...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",Premier Medical Group offers various types of ...,8559,8523,36,0.0128564999999999
7,What are some reasons someone might want to pu...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",Some reasons someone might want to pursue a ca...,12400,12328,72,0.0186359999999999
8,What are some benefits of working in the healt...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",Some benefits of working in the healthcare fie...,13267,13206,61,0.019931
9,What are some examples of technical healthcare...,"[SystemMessage(content=""You are a helpful assi...","Human: [{'content': ""You are a helpful assista...",Some examples of technical healthcare jobs inc...,13127,13078,49,0.019715
