In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings    # replaces OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import Ollama                  # replace ChatOpenAI

from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
loader = WebBaseLoader(
    "https://docs.smith.langchain.com/administration/tutorials/manage_spend"
)

In [3]:
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2b996490fa0>

In [4]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='\n\n\n\n\nOptimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupConceptual GuideSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceChangelogCloud architecture and scalabilityAuthz and AuthnAuthenticat

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
documents = text_splitter.split_documents(docs)

In [6]:
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvalua

In [7]:
hf_embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
    # by default this uses CPU inference. If you have a GPU, HF will auto-detect.
)

  hf_embeddings = HuggingFaceEmbeddings(


In [8]:
vectorstoredb = FAISS.from_documents(
    documents,
    hf_embeddings
)

In [9]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2b9f57b5570>

In [10]:
query = "LangSmith Traces usage is measured per workspace"
result=vectorstoredb.similarity_search(query)
result[0].page_content

"The first metric tracks all traces that you send to LangSmith. The second tracks all traces that also have our Extended 400 Day Data Retention.\nFor more details, see our data retention conceptual docs. Notice that these graphs look\nidentical, which will come into play later in the tutorial.\nLangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example),\nor teams within an organization. As a LangSmith administrator, we want to understand spend granularly per each of these units. In\nthis case where we just want to cut spend, we can focus on the environment responsible for the majority of costs first for the greatest savings.\nInvoices‚Äã\nWe understand what usage looks like in terms of traces, but we now need to translate that into spend. To do so,\nwe head to the Invoices tab. The first invoice that will appear on screen is a draft of your current month's\ninvoice, which shows your running spend thus far this month

In [13]:
llm = Ollama(
    model="llama2:7b"        # or "wizard-v1" or any Ollama-installed model
    # you can pass additional kwargs: temperature=…, max_tokens=… if desired.
)

  llm = Ollama(


In [14]:
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| Ollama(model='llama2:7b')
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [15]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith Traces usage is measured per workspace",
    "context":[Document(page_content="LangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example), or teams within an organization. ")]
})

'Of course! Based on the provided context, the answer to the question "How is LangSmith Traces usage measured?" is:\n\nLangSmith Traces usage is measured per workspace.'

In [18]:
### Input--->Retriever--->vectorstoredb

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2b9f57b5570>

In [19]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [20]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002B9F57B5570>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
         

In [43]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"Our gen AI application is called between 1.2-1.5 times per second"})
response['answer']

'Based on the provided context, the answer to the question "How many traces does LangSmith expect to receive per month with the given expected growth in load?" is:\n7,800,000 traces per month.'

In [44]:
response

{'input': 'Our gen AI application is called between 1.2-1.5 times per second',
 'context': [Document(id='a44c479d-4241-444b-a738-3a659942016a', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Current Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick back-of-the-envelope calculation to get a good limit of:\nlimit = current_load_per_day * expected_growth * days/month      = 130,000 * 2 * 30      = 7,800,000 traces / month\nWe click on the edit icon on the right side of the table for our Pro

In [45]:
response['context']

[Document(id='a44c479d-4241-444b-a738-3a659942016a', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Current Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick back-of-the-envelope calculation to get a good limit of:\nlimit = current_load_per_day * expected_growth * days/month      = 130,000 * 2 * 30      = 7,800,000 traces / month\nWe click on the edit icon on the right side of the table for our Prod row, and can enter this limit as follows:'),
 Document(id='33fd57f3-11be-42fe-bff9-4d46df