LangChain: Memory
- ConversationBufferMemory
- ConversationBufferWindowMemory
- ConversationTokenBufferMemory
- ConversationSummaryMemory

In [None]:
import openai
# openai.api_key = "API_KEY"

# llm_model = "gpt-3.5-turbo"
llm_model = ""

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature = 0.0, model = llm_model)

Conversation Buffer Memory

In [None]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm = llm,
    memory = memory,
    verbose = True
)

# each line will give you an AI response
conversation.predict(input = "Hi, my name is Andrew")
conversation.predict(input="What is 1+1?")
conversation.predict(input="What is my name?")

# It will remember your name

# see saved historical chats
print(memory.buffer)
memory.load_memory_variables({})

# append new chats
memory.save_context({"input": "Hi"}, {"output":"What's up"})

Conversation Buffer Window Memory

In [None]:
from langchain.memory import ConversationBufferWindowMemory

# k = 1, one last exchange (Human, AI)
memory = ConversationBufferWindowMemory(k = 1)
conversation = ConversationChain(
    llm = llm, 
    memory = memory,
    verbose = False
)

# each line will give you an AI response
conversation.predict(input = "Hi, my name is Andrew")
conversation.predict(input="What is 1+1?")
conversation.predict(input="What is my name?")

# It will not remember your name

Conversation Token Buffer Memory

In [None]:
# limit the max tokens, this maps more directly to the cost
# different llm uses different ways of counting tokens

from langchain.memory import ConversationTokenBufferMemory

memory = ConversationTokenBufferMemory(llm = llm, max_token_limit=50)

memory.save_context({"input": "AI is what?!"},
                    {"output": "Amazing!"})
memory.save_context({"input": "Backpropagation is what?"},
                    {"output": "Beautiful!"})
memory.save_context({"input": "Chatbots are what?"}, 
                    {"output": "Charming!"})

memory.load_memory_variables({})

Conversation Summary Memory

In [None]:
# use the LLM to summarize the conversation so far & use as memory
# summarized as a System message. e.g. {'history': "System: Human and AI ...... Human asks ....\nAI: ...."}
# Other memories: {'history':"AI: Beautiful!\nHuman: Chatbots are what?\nAI: Charming!"}

# it tries to keep the explicit storage of the messages up to the # of tokens we have specified as limit (if max_tokens = 100, original msg will kept for <= 100 tokens). 
# And then anything beyond that, it will use CLM to generate a summary. 

# e.g. usecase: search on internet, & want to keep facts
# others => suitable for chatbots

In [None]:
from langchain.memory import ConversationSummaryBufferMemory

# create a long string
schedule = "There is a meeting at 8am with your product team. \
You will need your powerpoint presentation prepared. \
9am-12pm have time to work on your LangChain \
project which will go quickly because Langchain is such a powerful tool. \
At Noon, lunch at the italian resturant with a customer who is driving \
from over an hour away to meet you to understand the latest in AI. \
Be sure to bring your laptop to show the latest LLM demo."

memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=100)
memory.save_context({"input": "Hello"}, {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"},
                    {"output": "Cool"})
memory.save_context({"input": "What is on the schedule today?"}, 
                    {"output": f"{schedule}"})

memory.load_memory_variables({})

conversation = ConversationChain(
    llm=llm, 
    memory = memory,
    verbose=True
)

conversation.predict(input="What would be a good demo to show?")

memory.load_memory_variables({})

Additional Memory Types
- Vector data memory (stored in a vector db & retrieves the most relevant blocks of text)
- Entity memories (focus on details about specific entities)

- can also use multi-memories at one time. e.g. Conversation + Entity memory to recall individuals

- can also store conversation in a conventional database (e.g. key-value store or SQL)