# First attempt at writing a memory module for ChatGpt Api

* Simply track a history of messages until capacity. Done
* Fifo Queue - Done
* Vector Storage, memory is created by filling the context only with messages from the storage, 
    * either in Q/A pairs or at message level - Done 
    * either ordered in terms of similarity DONE
    * or chronological -  TODO
    * summarized in a single message or less messages the {user} token are still wasteful (should check if they use only one for that) --> 6 tokens extra per message
* Fifo Queue with Outs into VectorStorage, half of the memory is filled with samples from the vector storage / half from retrieval.  TODO
* Compressed Fifo Queue, a compressor thread creates a minified version of the original memory thread creating an information-bottleneck TO REFACTOR
* Compressed Fifo Retrieval, a compressor thread creates a minified version of the original memory, a attention thread selects compressed messages that are releavant for the answer and use them as seed for retrieval TODO
    * potentially create the retrieval prompt with a recaller thread TODO

Lacking of examples sorry


In [None]:
# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
import openai
openai.api_key = "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"

In [None]:
class OpenAiEmbedder:
    def get_embedding_size(self):
        return 1536
    def embed(self, data, embed_mark = True, verbose = False):
        try:
            if embed_mark is False and type(data) is dict and "content" in data:
                print("Embedding without mark", data["content"])
                out = openai.Embedding.create(input=data["content"], engine='text-embedding-ada-002')
            else:
                if verbose is True:
                    print("Embedding without preprocessing the input", data)
                out = openai.Embedding.create(input=str(data), engine='text-embedding-ada-002')
        except:
            raise ValueError("The data  is not valid")
        return out.data[0].embedding
    def embed_list(self,data):
        #use the batched version of the API by giving a list as input
        #che that is listo of strings
        if type(data) is not list:
            raise ValueError("The data  is not valid")
        out = openai.Embedding.create(input=data, engine='text-embedding-ada-002')

In [None]:
def mark_system(system_prompt):
    return {"role": "system", "content": system_prompt}
def mark_answer(answer):
    return {"role": "assistant", "content": answer}
def mark_question(question):
    return {"role": "user", "content": question}
def check_dict(message_dict):
        if type(message_dict) is list and len(message_dict) == 1 and type(message_dict[0]) is dict:
            message_dict = message_dict[0]
        elif type(message_dict) is not dict:
            raise Exception("The message_dict should be a dictionary or a [dictionary] instead it is ", message_dict, type(message_dict))  
        return message_dict

In [None]:
import faiss
import numpy as np
import pickle

class MemoryIndex:
    """ this class is a wrapper for a faiss index, it contains information about the format of the index the faiss index itself"""
    def __init__(self, index = None,values = None, embeddings = None, name='memory_index', save_path = None, load= False):
        self.name = name
        self.embedder = OpenAiEmbedder()
        self.save_path = save_path
        # with load been through we search for a pickle file with the same name of the index
        if load is True:
            self.load()
        else:
            self.init_index(index,values,embeddings)


    def init_index(self,index,values,embeddings):
        #fist case is when we create a new index from scratch
        if index is None and values is None and embeddings is None :
            print("Creating a new index")
            self.index = faiss.IndexFlatIP(self.embedder.get_embedding_size())
            self.values = []
        #second case is where we create the index from a list of embeddings
        elif index is None and values is not None and embeddings is not None and len(values) == len(embeddings):
            print("Creating a new index from a list of embeddings and values")
            self.index = faiss.IndexFlatIP(self.embedder.get_embedding_size())
            for embedding,value in zip(embeddings,values):
                self.add_to_index_embedding(value, embedding) 
        #third case is where we create the index from a faiss index and values list  
        elif isinstance(index, faiss.Index) and index.d == self.embedder.get_embedding_size() and type(values) == list and len(values) == index.ntotal:
            print("Creating a new index from a faiss index and values list")
            self.index = index
            self.values = values
        #fourth case is where we create an index from a list of values, the values are embedded and the index is created
        elif index is None and values is not None and embeddings is None:
            print("Creating a new index from a list of values")
            self.index = faiss.IndexFlatIP(self.embedder.get_embedding_size())
            for value in values:
                self.add_to_index(value)
        else:
            raise ValueError("The index is not a valid faiss index or the embedding dimension is not correct")

    def add_to_index(self,value, verbose = False):
        """index a message in the faiss index, the message is embedded and the id is saved in the values list
        """
        if value not in self.values:
            try:
                embedding = self.embedder.embed(value)
                if verbose:
                    display(Markdown("The value {value} was embedded".format(value = value))) 
            except:
                raise ValueError("The message cant be embedded", value)
        
            self.index.add(np.array([embedding]).astype(np.float32))
            self.values.append(value)
        else:
            if verbose:
                display(Markdown("The value {value} was already in the index".format(value = value)))

    def add_to_index_embedding(self, value, embedding, verbose = False):
        """index a message in the faiss index, the message is embedded and the id is saved in the values list
        """
        #check that the embedding is of the correct size and type, the type can be
        # list of floats, numpy array of floats, string of a list of floats
        # if list of floats convert to numpy array 
        # if string convert to list of floats using eval and then to numpy array
        if type(embedding) is list:
            embedding = np.array([embedding])
        elif type(embedding) is str:
            embedding = eval(embedding)
            embedding = np.array([embedding]).astype(np.float32)
        elif type(embedding) is not np.ndarray:
            raise ValueError("The embedding is not a valid type")
        if value not in self.values:
            self.index.add(embedding)
            self.values.append(value)
        else:
            if verbose:
                display(Markdown("The value {value} was already in the index".format(value = value)))

    def faiss_query(self, key, k = 10):
        # Embed the data
        embedding = self.embedder.embed(key)
        if k > len(self.values):
            k = len(self.values)
        # Query the Faiss index for the top-K most similar values
        D, I = self.index.search(np.array([embedding]).astype(np.float32), k)
        values = [self.values[i] for i in I[0]]
            
        return values
    def save(self, path=None):
        """saves the index and values to a pickle file"""
        if path is None and self.save_path is None:
            path = self.name + ".pkl"
        elif path is None and self.save_path is not None:
            if self.save_path.endswith("/"):
                path = self.save_path + self.name + ".pkl"
            else:
                path = self.save_path + "/" + self.name + ".pkl"
        print("Saving the index to ", path)
        with open(path, 'wb') as f:
            pickle.dump({'index': self.index, 'values': self.values}, f)

    def load(self, path=None):
        """loads the index and values from a pickle file"""
        if path is None and self.save_path is None:
            path = self.name + ".pkl"
        elif path is None and self.save_path is not None:
            if self.save_path.endswith("/"):
                path = self.save_path + self.name + ".pkl"
            else:
                path = self.save_path + "/" + self.name + ".pkl"

        with open(path, 'rb') as f:
            data = pickle.load(f)
            self.index = data['index']
            self.values = data['values']

In [None]:
import pandas as pd
import copy
import os

class PandaIndex(MemoryIndex):
    def __init__(self, pandaframe, columns=None, name='panda_index', save_path=None, in_place=True, embeddings_col=None):
        self.columns = columns
        self.values = []

        # Load or copy pandaframe, and set self.name, self.columns
        if type(pandaframe) == str and pandaframe.endswith(".csv") and os.path.isfile(pandaframe):
            try:
                pandaframe = pd.read_csv(pandaframe)
            except:
                raise ValueError("The CSV file is not valid")
            self.name = pandaframe.split("/")[-1].split(".")[0]
            self.columns = "values"
        elif type(pandaframe) == pd.core.frame.DataFrame and columns is not None:
            if not in_place:
                pandaframe = copy.deepcopy(pandaframe)
        else:
            raise ValueError("The pandaframe is not a valid pandas dataframe or the columns are not valid or the path is not valid")

        values, embeddings = self.extract_values_and_embeddings(pandaframe, embeddings_col)
        super().__init__(values=values, embeddings=embeddings, name=name, save_path=save_path)

    def extract_values_and_embeddings(self, pandaframe, embeddings_col):
        if type(self.columns) == list and len(self.columns) > 1:
            pandaframe["values"] = pandaframe[self.columns].apply(lambda x: ' '.join(x), axis=1)
            self.columns = "values"
        elif type(self.columns) == list and len(self.columns) == 1:
            self.columns = self.columns[0]
            pandaframe["values"] = pandaframe[self.columns]
            self.columns = "values"
        elif type(self.columns) != str:
            raise ValueError("The columns are not valid")

        values = []
        embeddings = []

        for _, row in pandaframe.iterrows():
            value = row["values"]
            values.append(value)

            if embeddings_col is not None:
                embedding = row[embeddings_col]
                embeddings.append(embedding)

        return values, embeddings if embeddings_col is not None else None


In [None]:
import tiktoken
from IPython.display import display, Markdown

class MemoryThread:
    """this class is used to keep track of the memory thread and the total number of tokens all memories should subclass this class
    if max_memory is None it has no limit to the number of tokens that can be stored in the memory thread """
    def __init__(self,name= 'memory',max_memory= None):
        self.name = name
        self.max_memory = max_memory
        self.memory_thread = []
        self.total_tokens = 0
        self.tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
    def __getitem__(self, idx):
        return self.memory_thread[idx]    

        

    def get_message_tokens(self, message_dict):
        message = message_dict["content"]
        return len(self.tokenizer.encode(message))+6 # +6 for the role token

    def remove_message(self, message_dict=  None , idx = None):
        # if idx search in the memory_thread the latest message that matches the message_dict an
        #remove it from the memory_thread otherwise remove the message at the index idx
        # update the total number of tokens
        # return a boolean that indicates if the message was found and removed
        if message_dict is None and idx is None:
            raise Exception("You need to provide either a message_dict or an idx")
        
        if idx is None:
            message_dict = check_dict(message_dict)
            search_results = self.find_message(message_dict)
            if search_results is not None:
                idx = search_results[-1]["idx"]
                message = search_results[-1]["message"]
                self.memory_thread.pop(idx)
                self.total_tokens -= self.get_message_tokens(message)
            else:   
                raise Exception("The message was not found in the memory thread")
        else:
            if idx < len(self.memory_thread):
                message = self.memory_thread.pop(idx)
                self.total_tokens -= self.get_message_tokens(message)
            else:  
                raise Exception("The index was out bound")
    
    def add_message(self,message_dict: dict):
        # message_dict = {"role": role, "content": content}
        #chek that the message_dict is a dictionary or a list of dictionaries 
        message_tokens = self.get_message_tokens(message_dict)
        
        if  self.max_memory is None or self.total_tokens + message_tokens <= self.max_memory:
            #add the message_dict to the memory_thread
            # update the total number of tokens
            self.memory_thread.append(message_dict)
            self.total_tokens += message_tokens
            return True    
        else :
            display(Markdown("The memory thread is full, the last message was not added"))
            return False

    
                    
    def find_message(self,message_dict: dict, last=False):
        # search the memory_thread from start_idx to the end of the memory_thread for all the messages that match the message_dict
        # return a seach_results dictionary with the following structure [{"message": message, "idx": idx}]
        # if last is True return only the last message that matches the message_dict
        search_results = []
        message_dict = check_dict(message_dict)

        print("the message dict is ", message_dict, type(message_dict))
        for idx, message in enumerate(self.memory_thread):
            print("the index is ", idx, type(idx))
            print("the message is ", message, type(message))
            if message["role"] == message_dict["role"] and message["content"] == message_dict["content"]:
                search_results.append({"message": message, "idx": idx})
        if last and len(search_results) > 0:
            return [search_results[-1]]
        elif len(search_results) > 0:
            return search_results
        else:
            return None
    
    def length(self):
        #return the length of the memory_thread
        return len(self.memory_thread)
    
    def slice_tokens(self, start_idx= 0, end_idx = None ):
        #compute the tokens from start_idx to end_idx
        # default behavior is to compute the tokens of the whole memory_thread
        tokens = 0
        if end_idx is None:
            end_idx = len(self.memory_thread)
        try:
            for message in self.memory_thread[start_idx:end_idx]:
                tokens += self.get_message_tokens(message)
            return tokens
        except:
            ValueError ("The slice is not valid")
        
    def get_message(self, idx: int ):
        return self.memory_thread[idx]
    
    def get_thread(self):
        return self.memory_thread
    
    def slice(self,start,end):
        #return the memory_thread slice from start_idx to end_idx
        # default behavior is to return the whole memory_thread
        try:
            return self.memory_thread[start:end]
        except:
            ValueError ("The slice is not valid")

    def print(self):
        # detailed output of the memory_thread using markdown
        
        display(Markdown("## Memory Thread"))
        display(Markdown("#### Total Tokens: "+str(self.total_tokens)))
        display(Markdown("#### Max Tokens: "+str(self.max_memory)))
        display(Markdown("#### Number of Messages: "+str(len(self.memory_thread))))
        display(Markdown("#### Messages:"))
        for message in self.memory_thread:
            display(Markdown("#### "+message["role"]+": "+message["content"]))

In [None]:
import copy 

class FifoMemory(MemoryThread):
    """FIFO Memory Thread, the oldest messages are removed first when reaching the max_memory limit, the memory is defined in terms of tokens, 
    outs are passe to the longterm_memory, 
    lucid_memory is a redundant memory that stores all the messages
    """
    def __init__(self, name= 'fifo_memory', max_memory = None, longterm_thread = None):
        
        super().__init__(name= name , max_memory= max_memory)
        self.lucid_thread = MemoryThread(name = 'lucid_memory',max_memory = None)
        if longterm_thread is None:
            self.longterm_thread = MemoryThread(name ='longterm_memory',max_memory = None)
        else:
            self.longterm_thread = longterm_thread
        # create an alias for the memory_thread to make the code more readable
        self.fifo_thread = self.memory_thread
        
        
    def to_longterm(self, idx):
        #move the message at the index idx to the longterm_memory
        display(Markdown("The memory thread is full, the oldest message with index {} was moved to the longterm memory".format(idx)))
        message = copy.deepcopy(self.memory_thread[idx])
        print("preso il messagio e provo a ad aggiungerlo al longterm", message)
        status = self.longterm_thread.add_message(message)
        if status:
            print("ho aggiunto il messaggio al longterm")
            self.remove_message(idx=idx)
        else:
            raise Exception("The longterm memory is bugged")    
        
    def add_message(self,message_dict: dict):
        # message_dict = {"role": role, "content": content}
        #chek that the message_dict is a dictionary or a list of dictionaries
        self.lucid_thread.add_message(message_dict)
        message_dict = check_dict(message_dict)
        message_tokens = self.get_message_tokens(message_dict)
        
        if self.total_tokens + message_tokens > self.max_memory:
            #remove the oldest message from the memory_thread using the FIFO principle, if not enough space is available remove the oldest messages using  until enough space is available
            while self.total_tokens + message_tokens > self.max_memory and len(self.memory_thread) > 0:
                #remove the oldest message from the memory_thread using the FIFO principle and add it to the longterm_memory
                
                self.to_longterm(idx=0)
            super().add_message(message_dict)
            return True
        else:
            #add the message_dict to the memory_thread
            # update the total number of tokens
            super().add_message(message_dict)
            return True 

In [None]:
class VectorMemory(MemoryThread, MemoryIndex):
    """ vector memory, creates a faiss index with the messages and allows to search for similar messages, memory threads can be composed in similarity order or in (TODO) chronological order 
    """
    def __init__(self, index = None, name= 'vector_memory', max_context = 2048):
        super().__init__(name= name , max_memory= None)
        MemoryIndex.__init__(self, index = index, name = name)
        self.max_context = max_context
        
    def index_message(self,message_dict: dict, verbose = True):
        """index a message in the faiss index, the message is embedded and the id is saved in the ids list
        """
        message_dict = check_dict(message_dict)
        self.add_to_index(value = message_dict, verbose = verbose)

    def add_message(self,message_dict: dict):
        print("checking the dict")
        message_dict = check_dict(message_dict)
        print("trying to add the message")
        super().add_message(message_dict)
        self.index_message(message_dict) 
        return True
    
    def get_token_bound_prompt(self, query, k = 10):
        prompt = []
        context_tokens = 0
        if len(self.memory_thread) > 0 and self.total_tokens > self.max_context:
            top_k = self.faiss_query(mark_question(query), k = len(self.memory_thread))
            # print("top_k: ", top_k)
            top_k_prompt = []
            for message in top_k:
                #mark the message and gets the length in tokens
                message_tokens = self.get_message_tokens(message)
                if context_tokens+message_tokens <= self.max_context:
                    top_k_prompt+=[message]
                    context_tokens += message_tokens
            #inver the top_k_prompt to start from the most similar message
            top_k_prompt.reverse()
            prompt+=top_k_prompt
            #reverse the prompt so that last is the most similar message
            prompt.reverse()
        elif len(self.memory_thread) > 0:
            prompt+=self.memory_thread    
        return prompt

In [None]:
import gradio as gr
class Chat:
    """this is the base class for chatbots, it defines the basic functions that a chatbot should have, mainly the calls to chat-gpt api, and a basic gradio interface, you need to create a sub-class to connect it to a memory thread"""
    def __init__(self,system_prompt:str = None, user_prompt:str = None, max_output_tokens = 1000):
        self.model = "gpt-3.5-turbo"
        self.tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
        self.max_output_tokens = max_output_tokens
        if system_prompt is None:
            self.system_prompt = self.get_default_system_prompt()
        if  user_prompt is None:
            self.user_prompt = self.get_default_user_prompt()
        self.failed_responses = []
        self.prompt_func = self.one_shot_prompt
        self.answers = []

    def get_mark_from_response(self, response):
        #return the answer from the response
        role = response['choices'][0]["message"]["role"]
        message = response['choices'][0]["message"]["content"]
        return {"role": role, "content": message}
    def get_str_from_response(self, response):
        #return the answer from the response
        return response['choices'][0]["message"]["content"]
        
    def get_default_system_prompt(self):
        one_shot_prompt= "You are a useful Assistant you role is to answer questions in an exhaustive way! Please be helpful to the user he loves you!"
        return one_shot_prompt
    
    def get_default_user_prompt(self):
        empty_user_prompt = "{question}"
        return empty_user_prompt 
    
    def one_shot_prompt(self, message):
        #compose the prompt for the chat-gpt api
        prompt = [mark_system(self.system_prompt)]+ [mark_question(self.user_prompt.format(question=message))]
        return prompt, mark_question(self.user_prompt.format(question=message))

    def chat_response(self,prompt):
        if type(prompt) is str:
            prompt, _ = self.prompt_func(prompt)
        try:
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=prompt,
                max_tokens=self.max_output_tokens,
            )
            return response, True
        except openai.error.APIError as e:
            print(e)
            fail_response = {"choices": [{"message": {"content": "I am sorry, I am having trouble understanding you. There might be an alien invasion interfering with my communicaiton with OpenAI."}}]}
            self.failed_responses.append(fail_response)
            return fail_response , False

    def query(self, message):
        """ overwritten by sub-classes to add memory to the chatbot"""
        prompt, _ = self.prompt_func(message)
        response, success = self.chat_response(prompt)
        display(Markdown("#### Question: \n {question}".format(question = message)))
        if success:
            self.answers.append(self.get_mark_from_response(response))
            display(Markdown(" #### Anwser: \n {answer}".format(answer = self.get_str_from_response(response)))) 
            return self.answers[-1]

    def reply(self,question):
        #wrapprer for query that only returns the answer as a string
        return self.query(question)["content"]    

    def run_text(self, text, state):
        print("===============Running run_text =============")
        print("Inputs:", text)
        try: 
            print("======>Current memory:\n %s" % self.memory_thread)
        except:
            print("======>No memory")    
        answer = self.query(text)    
        response = answer["content"]
        state = state + [(text, response)]
        print("Outputs:", state)
        return state, state

    def gradio(self):
        with gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}") as demo:
            chatbot = gr.Chatbot(elem_id="chatbot", label="NeuralDragonAI Alpha-V0.1")
            state = gr.State([])
            with gr.Row():
                with gr.Column(scale=1):
                    txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image").style(container=False)
                with gr.Column(scale=0.15, min_width=0):
                    clear = gr.Button("Clear️")

            txt.submit(self.run_text, [txt, state], [chatbot, state])
            txt.submit(lambda: "", None, txt)        
            demo.launch(server_name="localhost", server_port=7860  )          

In [None]:
class ToolChat(Chat):
    '''this class is a chatbot that can use tools to answer questions,
      it uses a Question THought Action framework to answer questions
      Tools are python functions that take a  value that can be converted from a 
      string as input and return a string as output. Chatgpt api will output wwha ttool to use
      and the input for the tools iwth a json blob
      The prompting and agent framework are inspired from LangChain/agent.py'''
    def __init__(self,tools:dict = None, prompt_type:str = "langchain"):
        super().__init__(None, None)
        #tools are list of dictionaries with name, description and function
        if tools is None:
            self.tools = [{"name": "", "description": "",}]
        else:
            self.tools = tools
        self.prompt_type = prompt_type
        self.prompt_func = self.tool_prompt
        self.format_instructions = self.get_format_instructions()
        self.scratchpad = ""
        

    def get_langchain_instructions(self):
        format_instructions = """The way you use the tools is by specifying a json blob.
            Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).

            The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:

            ```
            {{
            "action": "calculator",
            "action_input": "1 + 2"
            }}
            ```

            ALWAYS use the following format:

            Question: the input question you must answer
            Thought: you should always think about what to do
            Action: 
            ```
            $JSON_BLOB
            ```
            Observation: the result of the action
            ... (this Thought/Action/Observation can repeat N times)
            Thought: I now know the final answer
            Final Answer: the final answer to the original input question"""
        
        return format_instructions
    def get_langchain_onestep_instructions(self):
        format_instructions = """The way you use the tools is by specifying a json blob.
            Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).

            The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:

            ```
            {{
            "action": "calculator",
            "action_input": "1 + 2"
            }}
            ```

            ALWAYS use the following format:

            Question: the input question you must answer
            Thought: you should always think about what to do
            Action: 
            ```
            $JSON_BLOB
            ```
            Observation: the result of the action
            ... (this Thought/Action/Observation can occur only once per response NEVER repeat, you can only use the tool once per response)
            
            
            IFF you reach the final answer, use the following format:
            Thought: I now know the final answer\n
            Final Answer: the final answer to the original input question
            ELIF you only have a partial answer use the following format:
            Thought: I now know the partial answer\n
            Partial Answer: the partial answer to the original input question
            REMEMBER TO NEVER OUTPUT TWO ACTIONS IN A SINGLE RESPONSE"""
        
        return format_instructions
    
    def get_goap_format_instructions(self):
        format_instructions = """The way you use the tools is by specifying a json blob.
            Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).

            The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:

            ```
            {{
            "action": "calculator",
            "action_input": "1 + 2"
            }}
            ```

            ALWAYS use the following format:

            Question: the input question you must answer\n
            
            Internal State Update: you should always update your understanding of the world based on the previous observations and the question\n
            Current Observation: your understanding of the current state of the world expressed as a list of tuples containing a state variable name and a boolean indicating whether is currently active or not, this are different from your tools\n
            Goal: what states of the world must be true for the question to be answered\n
            Action: 
            ```
            $JSON_BLOB
            ```
            Conditions for taking the actions: list of tuples of state variables that needs to be TRUE or FALSe for the action to be taken\n
            IsConditionMet: boolean specifying whether the conditions are met by current_observation only ever complete with True/False\n
            Predicted Observation: here you have to write what do you expect the results of the action to be, if ISConditionMet is False then the predicted observation must be the same as the current observation
            (this Thought/Action/Predicted Observation can repeat N times until you get the final answer or you reach the maximum number of steps specified by the user)
            IFF you reach the final answer, use the following format:
            Thought: I now know the final answer\n
            Final Answer: the final answer to the original input question
            ELIF you only have a partial answer use the following format:
            Thought: I now know the partial answer\n
            Partial Answer: the partial answer to the original input question"""
        return format_instructions
    
    def get_format_instructions(self):
        """ much more elaborate prompt trying to imitate GOAL ORIENTED ACTION PLANNING framework"""
        prompt_type = self.prompt_type
        if prompt_type == "langchain":
            format_instructions = self.get_langchain_instructions()
        elif prompt_type == "langchain_onestep":
            format_instructions = self.get_langchain_onestep_instructions()
        elif prompt_type == "goap":
            format_instructions = self.get_goap_format_instructions()
        else:
            raise ValueError("prompt_type not valid")

        
        return format_instructions
    def get_system_message(self):
        prefix = """Answer the following questions as best you can. You have access to the following tools:"""
        suffix = """Begin! Reminder to always use the exact characters `Final Answer` when responding."""
        tool_strings = "\n".join([f"{tool['name']}: {tool['description']}" for tool in self.tools])
        tool_names = ", ".join([tool['name'] for tool in self.tools])
        format_instructions = self.get_format_instructions()
        format_instructions = format_instructions.format(tool_names=tool_names)
        system_template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
        return system_template

    def get_scratchpad_prompt(self):
        scratchpad_prompt = [
                f"This was your previous work "
                f"(but I haven't seen any of it! I only see what "
                f"you return as final answer):\n{self.scratchpad}"]
        return scratchpad_prompt[0]
        
    def tool_prompt(self, message):
        system_template = self.get_system_message()
        scratchpad_prompt = self.get_scratchpad_prompt()
        joint_message_and_scratchpad = "\n\n".join([message, scratchpad_prompt])
        
        prompt = [mark_system(system_template)]+ [mark_question(joint_message_and_scratchpad)]
        return prompt, mark_question(joint_message_and_scratchpad)
    
    def query(self, message):
        """ overwritten by sub-classes to add memory to the chatbot"""
        prompt, _ = self.prompt_func(message)
        response, success = self.chat_response(prompt)
        display(Markdown("#### Question: \n {question}".format(question = message)))
        if success:
            self.scratchpad += "\n" + self.get_str_from_response(response) + "\n"
            self.answers.append(self.get_mark_from_response(response))
            display(Markdown(" #### Anwser: \n {answer}".format(answer = self.get_str_from_response(response))))  
            return self.answers[-1]

In [None]:
class FifoChat(FifoMemory, Chat):
    """FIFO Memory Thread, the oldest messages are removed first when reaching the max_memory limit, the memory is defined in terms of tokens,
    outs are passed to the longterm_memory, the lucid_memory is a redundant memory that stores all the messages"""

    def __init__(self, system_prompt= None , name= 'fifo_memory', max_memory = 2048, longterm_thread = None):
        super().__init__(name, max_memory, longterm_thread)
        if system_prompt is None:
            self.system_prompt = self.get_default_system_prompt()
        else:
            self.system_prompt = system_prompt
        Chat.__init__(self, self.system_prompt)
        self.prompt_func = self.fifo_memory_prompt

    def fifo_memory_prompt(self, message):
        #compose the prompt for the chat-gpt api
        prompt = [mark_system(self.system_prompt)]+ self.memory_thread + [mark_question(self.user_prompt.format(question=message))]
        
        return prompt, mark_question(self.user_prompt.format(question=message))
        
    def query(self, question):
        #compose the prompt for the chat-gpt api
        prompt, marked_question = self.prompt_func(question)
        #call the chat-gpt api
        response, success = self.chat_response(prompt)
        if success:
            #add the question and answer to the chat_history
            answer = self.get_mark_from_response(response)
            self.add_message(marked_question)
            self.add_message(answer)
            #get the answer from the open ai response
            return answer
        else:
            return response.choices[0].message.content

In [None]:
class VectorChat(VectorMemory, Chat):
    """ Vector Memory combined with chat memory_prompt is constructed by filling the memory with the k most similar messages to the question till the max prompt memory tokens are reached"""
    def __init__(self, index=None, name='vector_memory', max_context = 2048, system_prompt = None, user_prompt = None):
        super().__init__(index, name, max_context)
        if system_prompt is None:
            self.system_prompt = self.get_default_system_prompt()
        Chat.__init__(self, self.system_prompt, user_prompt)
        self.prompt_func = self.vector_memory_prompt

    def vector_memory_prompt(self, question, k = 10):
        #starts by retieving the k most similar messages to the question
        # then starting from the most similar message it adds the messages to the prompt till the max_prompt is reached
        # the prompt is composed by the system prompt, the messages in the memory and the question
        # the marked question is the last message in the prompt
        
        prompt = [mark_system(self.system_prompt)]
        prompt +=  self.get_token_bound_prompt(question, k = k)
        prompt+=[mark_question(self.user_prompt.format(question=question))]
        
        return prompt, mark_question(self.user_prompt.format(question=question))
    
    def query(self, question,verbose = False):
        #compose the prompt for the chat-gpt api
        prompt, marked_question = self.prompt_func(question)

        if verbose:
            print("prompt: ", prompt)
        #call the chat-gpt api
        response, success = self.chat_response(prompt)
        if success:
            #add the question and answer to the chat_history
            answer = self.get_mark_from_response(response)
            self.add_message(marked_question)
            self.add_message(answer)
            #get the answer from the open ai response
            return answer
        else:
            return response.choices[0].message.content

```
       FIFOVCHAT
+---------------------------+
|        Input (Text)       |
+---------------------------+
           |
           v
+---------------------------+   embed   +---------------------------+
|    Short-term Memory (STM)|---------->|    Query Embedder         |
+---------------------------+           +---------------------------+
           |                                          |
           |                                          v
           |                             +---------------------------+
           |                             |   Embedded Input          |
           |                             +---------------------------+
           |                                          |
           |                                |ltm_add|   |ltm_search|      
           v                                          v
+---------------------------+           +---------------------------+
|    Working Memory         |<----------|   Long-term Memory (LTM)  |
+---------------------------+           +---------------------------+
           |                                          ^
           v                                          |
+---------------------------+                         |
|      Chat-response        |-------------------------|
+---------------------------+
           |  
           v
+---------------------------+                            
|        Output             |
+---------------------------+
```


In [None]:
class FifoVectorChat(FifoMemory,Chat):
    def __init__(self, system_prompt= None , name= 'fifo_vector_memory', max_memory = 2048, longterm_thread = None, longterm_frac = 0.5):
        self.total_max_memory = max_memory
        self.setup_longterm_memory(longterm_thread, max_memory , longterm_frac)
        
        super().__init__(name, self.max_short_term_memory, self.longterm_thread)
        if system_prompt is None:
            self.system_prompt = self.get_default_system_prompt()
        Chat.__init__(self, self.system_prompt)
        self.prompt_func = self.fifovector_memory_prompt
        self.prompt_list = []

    def setup_longterm_memory(self, longterm_thread, max_memory , longterm_frac):
        if longterm_thread is None:
            self.longterm_frac = longterm_frac
            self.max_short_term_memory =int(max_memory * (1-self.longterm_frac))
            self.max_longterm_memory = max_memory - self.max_short_term_memory    
            self.longterm_thread = VectorMemory(None, 'longterm_memory',max_context = self.max_longterm_memory)
        else:
            self.longterm_thread = longterm_thread
            self.max_longterm_memory = self.longterm_thread.max_context
            self.max_short_term_memory = self.total_max_memory - self.max_longterm_memory
            self.longterm_frac = self.max_longterm_memory/self.total_max_memory
    
    def fifovector_memory_prompt(self, question, k = 10):
        # compose the prompt for the chat-gpt api
        # the first half of the prompt is composed by long term memory with up to max_longterm_memory tokens
        # the second half ot the prompt is composed by the fifo memory with up to max_short_term_memory tokens
        # the prompt is composed by the system prompt, the messages in the memory and the question

        prompt = [mark_system(self.system_prompt)]
        #check if something is in the long term memory and if it is smaller than the max_longterm_memory
        if len(self.longterm_thread.memory_thread) > 0 and self.longterm_thread.total_tokens <= self.max_longterm_memory:
            #add all the messages in the long term memory
            prompt+=self.longterm_thread.memory_thread
        elif len(self.longterm_thread.memory_thread) > 0 and self.longterm_thread.total_tokens > self.max_longterm_memory:
            # if the long term memory is bigger than the max_longterm_memory then add the k most similar messages to the question till the max_longterm_memory is reached
            prompt += self.longterm_thread.get_token_bound_prompt(question, k =k)
        
        # add the complete short term memory to the prompt because it is a fifo memory is always smaller than the max_short_term_memory
        prompt+=self.memory_thread
        prompt+=[mark_question(self.user_prompt.format(question=question))]
        return prompt, mark_question(self.user_prompt.format(question=question))

    def query(self, question):
        #compose the prompt for the chat-gpt api
        prompt, marked_question = self.prompt_func(question)
        self.prompt_list.append(prompt)
        #call the chat-gpt api
        response, success = self.chat_response(prompt)
        if success:
            #add the question and answer to the chat_history
            answer = self.get_mark_from_response(response)
            self.add_message(marked_question)
            self.add_message(answer)
            #get the answer from the open ai response
            return answer
        else:
            return response.choices[0].message.content
    


In [None]:
# class FifoVectorPandaChat(FifoVectorChat):
#     """ fifo vector chat with additional panda indexes as external sources of information"""
#     def __init__(self, pandaindex, system_prompt= None , name= 'fifo_memory', max_memory = 2048, longterm_thread = None, longterm_frac = 0.5):
#         super().__init__(system_prompt, name, max_memory, longterm_thread, longterm_frac)
#         self.pandaindex = pandaindex
        

In [None]:
class PandaChat(PandaIndex, Chat):
    """ combines a chat with a panda index such that the chat response are based on the content of the pandaindex"""
    def __init__(self, pandaframe, max_context, max_output_tokens, index_description=None, 
                 columns=None, name='panda_index', save_path=None, in_place=True, embeddings_col=None):
        # Initialize PandaIndex
        PandaIndex.__init__(self, pandaframe, columns, name, save_path, in_place, embeddings_col)

        self.max_context = max_context

        # Initialize Chat
        self.tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
        Chat.__init__(self, max_output_tokens=max_output_tokens)

        self.prompt_func = self.panda_prompt
        self.system_prompt = self.get_default_panda_prompt(index_description)

    def get_default_panda_prompt(self, index_description):
        system_prompt = """You are a Chatbot assistant that can use a external knowledge base to answer questions.
        The user will always add hints from the external knowledge base. 
        You express your thoughts using princpled reasoning and always pay attention to the
         hints.  Your knowledge base description is {index_descrpiton}:"""
        return system_prompt.format(index_descrpiton = index_description)
    
    def get_hint_prompt(self, question):
        hints = self.get_token_bound_hints(question, k = 10)
        hints_string = "\n ".join(hints)
        prefix= "I am going to ask you a question and you should use the hints to answer it. The hints are:\n{hints_string}"
        questionintro ="The question is: {question}"
        return prefix.format(hints_string = hints_string) + questionintro.format(question = question)
    
    def panda_prompt(self, question):
        #compose the prompt for the chat-gpt api
        # the prompt is composed by the system_prompt, the top-k most similar messages to the question and the question

        prompt = [mark_system(self.system_prompt)]
        
        prompt += [mark_question(self.get_hint_prompt(question))]
        #display prompt
        # display(Markdown(str(prompt)))
        return prompt, mark_question(question)

    def get_token_bound_hints(self, query, k = 10):
        context_tokens = 0
        if len(self.values) > 0 :
            top_k = self.faiss_query(query, k = min(k, len(self.values)))
            # print("top_k: ", top_k)
            top_k_hint = []
            for hint in top_k:
                #mark the message and gets the length in tokens
                message_tokens = len(self.tokenizer.encode(hint))
                if context_tokens+message_tokens <= self.max_context:
                    top_k_hint+=[hint]
                    context_tokens += message_tokens
            #inver the top_k_prompt to start from the most similar message
            # top_k_hint.reverse()
            #reverse the prompt so that last is the most similar message
        return top_k_hint

In [None]:
# here we write a fifo vectorchat with a pandaindex as external source of information, we can not subclass pandaindex because many 
# methods are overlapping 

class FifoVectorPandaChat(FifoVectorChat):

    def __init__(self,pandaframe,columns,embeddings_col = None, system_prompt=None, name='fifovec_panda_memory',max_context=4000, max_memory=2048, longterm_thread=None, longterm_frac=0.5):
        super().__init__(system_prompt, name, max_memory, longterm_thread, longterm_frac)
        self.pandaindex = PandaChat(pandaframe,columns = columns, max_context = max_context, max_output_tokens = 100, index_description = "alice_pandraframe", embeddings_col = embeddings_col)
        self.prompt_func = self.memory_panda_prompt
        self.max_output_tokens = 100
        self.model = "gpt-4"
    


    def memory_panda_prompt(self, question, k = 10):
        # compose the prompt for the chat-gpt api
        # the first half of the prompt is composed by long term memory with up to max_longterm_memory tokens
        # the second half ot the prompt is composed by the fifo memory with up to max_short_term_memory tokens
        # the prompt is composed by the system prompt, the messages in the memory and the question

        prompt = [mark_system(self.system_prompt)]
        #check if something is in the long term memory and if it is smaller than the max_longterm_memory
        if len(self.longterm_thread.memory_thread) > 0 and self.longterm_thread.total_tokens <= self.max_longterm_memory:
            #add all the messages in the long term memory
            prompt+=self.longterm_thread.memory_thread
        elif len(self.longterm_thread.memory_thread) > 0 and self.longterm_thread.total_tokens > self.max_longterm_memory:
            # if the long term memory is bigger than the max_longterm_memory then add the k most similar messages to the question till the max_longterm_memory is reached
            prompt += self.longterm_thread.get_token_bound_prompt(question, k =k)
        
        # add the complete short term memory to the prompt because it is a fifo memory is always smaller than the max_short_term_memory
        prompt+=self.memory_thread
        prompt += [mark_question(self.pandaindex.get_hint_prompt(question))]
        return prompt, mark_question(self.user_prompt.format(question=question))
    

In [None]:
# class MasterChat(FifoVectorChat):
#     def __init__(self, children_system_prompts, vindex_list, name='master_chat', max_memory=2048, longterm_thread=None, longterm_frac=0.5):
#         super().__init__(name=name, max_memory=max_memory, longterm_thread=longterm_thread, longterm_frac=longterm_frac)

#         self.children_chats = {}
#         for child_name, system_prompt in children_system_prompts.items():
#             self.children_chats[child_name] = FifoVectorChat(system_prompt=system_prompt)

#         self.indexes = {vindex.name: vindex for vindex in vindex_list}

#     def get_default_controflow_prompt(self):
#         child_descriptions = ""
#         for child_name, child_chat in self.children_chats.items():
#             child_descriptions += f"\n- {child_name}: {child_chat.system_prompt}"

#         index_descriptions = ""
#         for index_name, index in self.indexes.items():
#             index_descriptions += f"\n- {index_name}: {index.description}"

#         system_prompt = f"""You are a MasterChat assistant that has access to multiple expert chat assistants, each with its own memory and vector index. 
#         Your role is to manage these expert assistants and determine the best sequence of sub-chats to answer the user's questions. 
#         Your memory is composed of a FifoVectorChat, which means you have a short-term memory based on the order of messages and a long-term memory based on vector retrieval.

#     You have the following expert chat assistants available:
#     {child_descriptions}

#     You also have access to the following knowledge indexes:
#     {index_descriptions}

#     When answering questions, you can activate the appropriate expert chat assistants by using the following protocol:

#     1. Write "activate_agent" followed by a JSON object containing the "agent_name" key and the corresponding agent's name as the value, and the "question" key with the question as its value, like this: "activate_agent {{\"agent_name\": \"child1\", \"question\": \"What is the capital of France?\"}}".

#     2. Provide the activated agent with any relevant hints gathered from the knowledge indexes in the prompt  "activate_agent {{\"agent_name\": \"child1\", \"question\": \"What is the capital of France?, !hint: France is in Europe [source: WikipedaIndex] \"}}".

#     Remember to always consider the context and maintain a principled approach in your reasoning."""

#         return system_prompt


In [None]:
df = pd.read_csv("alice.csv")

In [None]:
fvp = FifoVectorPandaChat(pandaframe = df, columns= "TEXT", embeddings_col="EMBEDDINGS" )


In [None]:
fvp.gradio()