In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

#Langsmith tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACKING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

In [5]:
## Data Ingestion - Scrap data from the website
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.langchain.com/langsmith/data-storage-and-privacy")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x20d64a62510>

In [None]:
# Load into a document
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/data-storage-and-privacy', 'title': 'Data storage and privacy - Docs by LangChain', 'language': 'en'}, page_content="Data storage and privacy - Docs by LangChainSkip to main contentWe've raised a $125M Series B to build the platform for agent engineering. Read more.Docs by LangChain home pageLangSmithSearch...⌘KGitHubTry LangSmithTry LangSmithSearch...NavigationData managementData storage and privacyGet startedObservabilityEvaluationPrompt engineeringDeploymentAgent BuilderHostingOverviewPlansCreate an account and API keyAccount administrationOverviewSet up a workspaceManage organizations using the APIManage billingSet up resource tagsUser managementReferenceLangSmith Python SDKLangSmith JS/TS SDKLangGraph Python SDKLangGraph JS/TS SDKLangSmith APIAPI reference for LangSmith DeploymentAdditional resourcesReleases & changelogsData managementData storage and privacyData purging for complianceScalability & resilienceAuthe

In [7]:
# Split document into chunks, there is context size so we need to divide into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents = text_splitter.split_documents(docs)
documents


[Document(metadata={'source': 'https://docs.langchain.com/langsmith/data-storage-and-privacy', 'title': 'Data storage and privacy - Docs by LangChain', 'language': 'en'}, page_content="Data storage and privacy - Docs by LangChainSkip to main contentWe've raised a $125M Series B to build the platform for agent engineering. Read more.Docs by LangChain home pageLangSmithSearch...⌘KGitHubTry LangSmithTry LangSmithSearch...NavigationData managementData storage and privacyGet startedObservabilityEvaluationPrompt engineeringDeploymentAgent BuilderHostingOverviewPlansCreate an account and API keyAccount administrationOverviewSet up a workspaceManage organizations using the APIManage billingSet up resource tagsUser managementReferenceLangSmith Python SDKLangSmith JS/TS SDKLangGraph Python SDKLangGraph JS/TS SDKLangSmith APIAPI reference for LangSmith DeploymentAdditional resourcesReleases & changelogsData managementData storage and privacyData purging for complianceScalability & resilienceAuthe

In [8]:
# Embed the documents into vectors
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [9]:
# Store into a VectorDB
from langchain_community.vectorstores import FAISS
vectorstoredb = FAISS.from_documents(documents,embeddings)

In [11]:
# Query the vectorDB
query = "langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers"
result = vectorstoredb.similarity_search(query)
print(result[0].page_content)

​Standalone Server
langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers: the API server, a PostgreSQL container, and a Redis container. All persistent data (checkpoints, assistants, etc.) are stored in the PostgreSQL database. Redis is used as a pubsub connection for real-time streaming of events. You can encrypt all checkpoints before saving to the database by setting a valid LANGGRAPH_AES_KEY environment variable. You can also specify TTLs for checkpoints and cross-thread memories in langgraph.json to control how long data is stored. All persisted threads, memories, and other data can be deleted via the relevant API endpoints.
Additional API calls are made to confirm that the server has a valid license and to track the number of executed runs and tasks. Periodically, the API server validates the provided license key (or API key).


In [12]:
# Query the vectorDB
query = "langgraph three containers"
result = vectorstoredb.similarity_search(query)
print(result[0].page_content)

​Standalone Server
langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers: the API server, a PostgreSQL container, and a Redis container. All persistent data (checkpoints, assistants, etc.) are stored in the PostgreSQL database. Redis is used as a pubsub connection for real-time streaming of events. You can encrypt all checkpoints before saving to the database by setting a valid LANGGRAPH_AES_KEY environment variable. You can also specify TTLs for checkpoints and cross-thread memories in langgraph.json to control how long data is stored. All persisted threads, memories, and other data can be deleted via the relevant API endpoints.
Additional API calls are made to confirm that the server has a valid license and to track the number of executed runs and tasks. Periodically, the API server validates the provided license key (or API key).


In [13]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "gpt-4o")

In [14]:
## Retrieval chain, Document chain
from langchain.chains.combine_documents import create_stuff_documents_chain

## Chatprompt template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
"""
Answer the following question based only on the provided context:
<context>
{context}
</context>
"""
)

document_chain = create_stuff_documents_chain(llm,prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000020D7B66FE90>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000020D79EE1590>, root_client=<openai.OpenAI object at 0x0000020D7B7F2E10>, root_async_client=<openai.AsyncOpenAI object at 0x0000020D7B66EAD0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'

In [16]:
from langchain_core.documents import Document

document_chain.invoke(
{"input": "langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers",
"context": [Document(page_content = "langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers: the API server, a PostgreSQL container, and a Redis container. All persistent data (checkpoints, assistants, etc.) are stored in the PostgreSQL database. Redis is used as a pubsub connection for real-time streaming of events. You can encrypt all checkpoints before saving to the database by setting a valid LANGGRAPH_AES_KEY environment variable. You can also specify TTLs for checkpoints and cross-thread memories in langgraph.json to control how long data is stored. All persisted threads, memories, and other data can be deleted via the relevant API endpoints.")]
}
)


'How can you encrypt checkpoints before saving them to the database in langgraph?\n\nYou can encrypt checkpoints before saving them to the database by setting a valid `LANGGRAPH_AES_KEY` environment variable.'

In [18]:
retriever = vectorstoredb.as_retriever()

In [19]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000020D75454790>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
            | Chat

In [20]:
# Get the response from the LLM
response = retrieval_chain.invoke({"input":"langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers"})
response['answer']

'What are the primary functions of the PostgreSQL and Redis containers in the "Standalone Server"?\n\nThe PostgreSQL container in the "Standalone Server" is used to store all persistent data, including checkpoints, assistants, etc. The Redis container is used as a pub/sub connection for real-time streaming of events.'

In [21]:
response

{'input': 'langgraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers',
 'context': [Document(id='3783fb39-4a9e-4297-a399-1b744c8773c1', metadata={'source': 'https://docs.langchain.com/langsmith/data-storage-and-privacy', 'title': 'Data storage and privacy - Docs by LangChain', 'language': 'en'}, page_content='\u200bStandalone Server\nlanggraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers: the API server, a PostgreSQL container, and a Redis container. All persistent data (checkpoints, assistants, etc.) are stored in the PostgreSQL database. Redis is used as a pubsub connection for real-time streaming of events. You can encrypt all checkpoints before saving to the database by setting a valid LANGGRAPH_AES_KEY environment variable. You can also specify TTLs for checkpoints and cross-thread memories in langgraph.json to control how long data is stor

In [22]:
response['context']

[Document(id='3783fb39-4a9e-4297-a399-1b744c8773c1', metadata={'source': 'https://docs.langchain.com/langsmith/data-storage-and-privacy', 'title': 'Data storage and privacy - Docs by LangChain', 'language': 'en'}, page_content='\u200bStandalone Server\nlanggraph up builds your local package into a Docker image and runs the server as the data plane consisting of three containers: the API server, a PostgreSQL container, and a Redis container. All persistent data (checkpoints, assistants, etc.) are stored in the PostgreSQL database. Redis is used as a pubsub connection for real-time streaming of events. You can encrypt all checkpoints before saving to the database by setting a valid LANGGRAPH_AES_KEY environment variable. You can also specify TTLs for checkpoints and cross-thread memories in langgraph.json to control how long data is stored. All persisted threads, memories, and other data can be deleted via the relevant API endpoints.\nAdditional API calls are made to confirm that the ser

In [None]:
ChatPromptTemplate
document_chain
create_stuff_documents_chain
retrieval_chain
create_retrieval_chain