In [None]:
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationChain
from langchain.vectorstores import FAISS
from langchain.chains.conversation.memory import (
    ConversationBufferMemory,
    ConversationBufferWindowMemory,
    ConversationSummaryMemory,
    ConversationSummaryBufferMemory,
    ConversationKGMemory,
    CombinedMemory
)
from langchain.memory.prompt import ENTITY_MEMORY_CONVERSATION_TEMPLATE
from langchain.memory import (
    ConversationEntityMemory,
    VectorStoreRetrieverMemory
)
from langchain.docstore import InMemoryDocstore
from langchain.callbacks import get_openai_callback

import faiss

## Setting the LLM

In [None]:
with open("openai_api.txt", "r") as f:
    OPENAI_API = f.read()

llm = OpenAI(
    model_name = "gpt-3.5-turbo-instruct",
    openai_api_key = OPENAI_API
)

embedding_llm = OpenAIEmbeddings(
    model = "text-embedding-ada-002",
    openai_api_key = OPENAI_API
)

## Types of Conversational Memory

We can use several types of conversational memory with the `ConversationChain`. Each has their own parameters, their own return types, and is useful in different scenarios. They just modify the text passed to the `history` parameter.

### `ConversationBufferMemory`

Is the most straightforward conversational memory in LangChain. Storing the raw `input` from the user and the `response` from the AI.

As an argument to the Conversational Memory Objects I can pass `ai_prefix` which denotes what the Model is (etc. "AI Assistant") and `human_prefix` to denote what the user is to the AI (etc. "Friend").

In [None]:
conversation_buf = ConversationChain(
    llm = llm,
    memory = ConversationBufferMemory()
)

for k in conversation_buf:
    print(k)

In [None]:
conversation_buf("Good morning AI!")

In [None]:
conversation_buf("How are you?")

Be Careful with the Token Usage when Conversation becomes Long:

In [None]:
## Creating a function to count the amount of tokens are being spended for each query

def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f"[INFO] Spent a total of {cb.total_tokens} tokens")

    return result

In [None]:
count_tokens(conversation_buf, "My interest here is to explore the potential of integrating Large Language Models with external knowledge")

In [None]:
count_tokens(conversation_buf, "I just want to analyze the different possibilities. What can you think of?")

In [None]:
count_tokens(conversation_buf, "Which data source types could be used to give context to the model?")

In [None]:
count_tokens(conversation_buf, "What is my aim again?")

In [None]:
## Seeing how this memory is stored in the Buffer

print(conversation_buf.memory.buffer)

Although saving everything gives the maximum information to the model, but other than slowing response times and increasing the cost, in long conversations can't remember anything past the LLM token limit (4096 tokens for `gpt-3.5-turbo-instruct` and `gpt-3.5-turbo`)

### `ConversationSummaryMemory`

This type of memory creates a summary of the conversation over time. This can be useful for condensing information from the conversation over time. Conversation summary memory summarizes the conversation as it happens and stores the current summary in memory. This memory can then be used to inject the summary of the conversation so far into a prompt/chain. This memory is most useful for longer conversations, where keeping the past message history in the prompt verbatim would take up too many tokens.

When using `ConversationSummaryMemory`, we need to pass an `LLM` to the object because the summarization is powered by an `LLM`.

In [None]:
conversation_sum = ConversationChain(
	llm = llm,
	memory=ConversationSummaryMemory(llm=llm)
)

for k in conversation_sum:
    print(k)

In [None]:
print(conversation_sum.memory.prompt.template)

In [None]:
count_tokens(conversation_sum, "Good morning AI!")

In [None]:
count_tokens(conversation_sum, "My interest here is to explore the potential of integrating Large Language Models with external knowledge")

In [None]:
count_tokens(conversation_sum, "I just want to analyze the different possibilities. What can you think of?")

In [None]:
count_tokens(conversation_sum, "Which data source types could be used to give context to the model?")

In [None]:
count_tokens(conversation_sum, "What is my aim again?")

In [None]:
print(conversation_sum.memory.buffer)

Using the `Summarizer` is that is sortens the number of tokens for long conversations, with the downside that the memorization of the conversation history is wholly reliant on the summarization ability of the intermediate summarization LLM and also requires token usage for the summarization.

### `ConversationBufferWindowMemory`

It keeps a list of the interactions of the conversation over time. It only uses the last K interactions. This can be useful for keeping a sliding window of the most recent interactions, so the buffer does not get too large.

With `k=1` the chain will remember the single latest interaction between the human and AI

In [None]:
conversation_bufw = ConversationChain(
	llm = llm,
    memory = ConversationBufferWindowMemory(k=1)
)

for k in conversation_bufw:
    print(k)

In [None]:
count_tokens(conversation_bufw, "Good morning AI!")

In [None]:
count_tokens(conversation_bufw, "My interest here is to explore the potential of integrating Large Language Models with external knowledge")

In [None]:
count_tokens(conversation_bufw, "I just want to analyze the different possibilities. What can you think of?")

In [None]:
count_tokens(conversation_bufw, "Which data source types could be used to give context to the model?")

In [None]:
count_tokens(conversation_bufw, "What is my aim again?")

In [None]:
conversation_bufw.memory.load_memory_variables(inputs=[])["history"]

If we only need memory of recent interactions, this is a great option. However, for a mix of both distant and recent interactions, there are other options.

### `ConversationSummaryBufferMemory`

It combines the two ideas: `ConversationSummaryMemory` and `ConversationBufferWindowMemory`. It keeps a buffer of recent interactions in memory, but rather than just completely flushing old interactions it compiles them into a summary and uses both. It uses token length rather than number of interactions to determine when to flush interactions.

When applying this to our earlier conversation, we can set $max\_token\_limit$ to a small number and yet the LLM can remember our earlier “aim”.

Naturally, the pros and cons of this component are a mix of the earlier components on which this is based.

Although requiring more tweaking on what to summarize and what to maintain within the buffer window, the `ConversationSummaryBufferMemory` give us plently of flexibility and is the only chain (so far) that allows us to remember distant interactions and store the most recent interactions in their raw form.

In [None]:
conversation_sum_bufw = ConversationChain(
    llm = llm,
    memory=ConversationSummaryBufferMemory(
        llm = llm,
        max_token_limit = 150
    )
)

In [None]:
count_tokens(conversation_sum_bufw, "Good morning AI!")

In [None]:
count_tokens(conversation_sum_bufw, "My interest here is to explore the potential of integrating Large Language Models with external knowledge")

In [None]:
count_tokens(conversation_sum_bufw, "I just want to analyze the different possibilities. What can you think of?")

In [None]:
count_tokens(conversation_sum_bufw, "Which data source types could be used to give context to the model?")

In [None]:
count_tokens(conversation_sum_bufw, "What is my aim again?")

In [None]:
## Short-term Memory

conversation_sum_bufw.memory.chat_memory.messages

In [None]:
## Long-term Memory

conversation_sum_bufw.memory.moving_summary_buffer

### `ConversationEntityMemory`

Entity memory remembers given facts about specific entities in a conversation. It extracts information on entities (using an LLM) and builds up its knowledge about that entity over time (also using an LLM).

In [None]:
conversation_entity = ConversationChain(
    llm = llm,
    prompt = ENTITY_MEMORY_CONVERSATION_TEMPLATE,
    memory = ConversationEntityMemory(llm=llm),
    verbose = True
)

In [None]:
conversation_entity.predict(input="Deven & Sam are working on a hackathon project")

In [None]:
conversation_entity.predict(input="They are trying to add more complex memory structures to Langchain")

In [None]:
conversation_entity.predict(input="They are adding in a key-value store for entities mentioned so far in the conversation.")

In [None]:
conversation_entity.predict(input="What do you know about Deven & Sam?")

In [None]:
## Displaying the Entities Captured by the LLM

conversation_entity.memory.entity_store.store

In [None]:
## The Messages that the LLM has Saved

for k in conversation_entity.memory.chat_memory.messages:
    print(k)

### `VectorStoreRetrieverMemory`

`VectorStoreRetrieverMemory` stores memories in a VectorDB and queries the top-K most “salient” docs every time it is called. This differs from most of the other Memory classes in that it doesn't explicitly track the order of interactions. In this case, the `docs` are previous conversation snippets.

In [None]:
## Initialize the Vectorstore

embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
embedding_fn = embedding_llm.embed_query
vectorstore = FAISS(
    embedding_function = embedding_fn,
    index = index,
    docstore = InMemoryDocstore({}),
    index_to_docstore_id={}
)

In [None]:
## Initializing the Memory Object

retriever = vectorstore.as_retriever(search_kwargs = dict(k=1))
memory = VectorStoreRetrieverMemory(retriever = retriever)

## When added to an agent, the memory object can save pertinent information from conversations or used tools

memory.save_context({"input": "My favorite food is pizza"}, {"output": "thats good to know"})
memory.save_context({"input": "My favorite sport is soccer"}, {"output": "..."})
memory.save_context({"input": "I don't the Celtics"}, {"output": "ok"})

In [None]:
## Creating the Conversation Chain

_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Relevant pieces of previous conversation:
{history}

(You do not need to use these pieces of information if not relevant)

Current conversation:
Human: {input}
AI:"""
PROMPT = PromptTemplate(
    template=_DEFAULT_TEMPLATE,
    input_variables=["history", "input"]
)
conversation_with_summary = ConversationChain(
    llm = llm,
    prompt = PROMPT,
    memory = memory,
    verbose = True
)

In [None]:
conversation_with_summary.predict(input="Hi, my name is Perry, what's up?")

In [None]:
conversation_with_summary.predict(input="what's my favorite sport?")

In [None]:
conversation_with_summary.predict(input="Whats my favorite food")

In [None]:
conversation_with_summary.predict(input="What's my name?")

### `ConversationKGMemory`

This type of memory uses a knowledge graph to recreate memory.

In [None]:
template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:

{history}

Conversation:
Human: {input}
AI:"""

prompt = PromptTemplate(
    template = template,
    input_variables = ["history", "input"]
)

conversation_with_kg = ConversationChain(
    llm = llm,
    prompt = prompt,
    memory = ConversationKGMemory(llm=llm),
    verbose=True
)

In [None]:
conversation_with_kg.predict(input="Hi, what's up?")

In [None]:
conversation_with_kg.predict(input="My name is James and I'm helping Will. He's an engineer.")

In [None]:
conversation_with_kg.predict(input="What do you know about Will?")

## Multiple Memory Classes

We can use multiple memory classes in the same chain. To combine multiple memory classes, we initialize and use the CombinedMemory class.

In [None]:
## Memory 1
conv_memory = ConversationBufferMemory(
    memory_key = "chat_history_lines",
    input_key = "input"
)

## Memory 2
summary_memory = ConversationSummaryMemory(
    llm = llm,
    input_key = "input"
)

## Combined
memory = CombinedMemory(memories=[conv_memory, summary_memory])

## Creating the Prompt
_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Summary of conversation:
{history}
Current conversation:
{chat_history_lines}
Human: {input}
AI:"""
PROMPT = PromptTemplate(
    input_variables=["history", "input", "chat_history_lines"],
    template=_DEFAULT_TEMPLATE,
)

## Creating the Chain
conversation = ConversationChain(
    llm = llm,
    verbose = True,
    memory = memory,
    prompt = PROMPT
)

In [None]:
conversation.run("Hi!")

In [None]:
conversation.run("Can you tell me a joke?")