# Chat with memory

In [None]:
%pip install python-dotenv langchain langchain-openai chromadb docarray --upgrade --quiet

In [None]:
#import os
#import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

#openai.api_key = os.environ['OPENAI_API_KEY']

### Debugging using LangSmith

In [None]:
import os
from uuid import uuid4

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"11 april - Chat with memory - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
#os.environ["LANGCHAIN_API_KEY"] = "<YOUR-API-KEY>"  # Update to your API key

from langsmith import Client

client = Client()

In [None]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
persist_directory = '../db/chroma-step2.3/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [None]:
question = "What are major topics for this class?"
docs = vectordb.similarity_search(question,k=3)
len(docs)

In [None]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0)
llm.predict("Hello world!")

In [None]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain.invoke({"query": question})
result["result"]

### Memory

Here, were using a simple buffere memory, but there are many more memory types available in the `langchain.memory` module.
See more here: https://python.langchain.com/docs/modules/memory/types/

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history", # key that can be referenced in prompts
    return_messages=True
)

### ConversationalRetrievalChain

This chain not only adds memory, but also a "rephrase"/"condense" step that conbines the history with the last question to create a standalone question that will be matched against the documents.

In [None]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)


In [None]:
question = "Is probability a class topic?"
result = qa({"question": question})

In [None]:
result['answer']

In [None]:
question = "why are those prerequesites needed?"
result = qa({"question": question})

result['answer']

### Check it out what happens under the hoods in LangSmith

https://smith.langchain.com/public/cbd48629-f2f9-4f13-816a-7b701bac9b83/r