### Simple GenAI App using LangChain

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# LangSmith Tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

### Data Ingestion - Scraping Data from the website

In [3]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.langchain.com/langsmith/billing")
loader

USER_AGENT environment variable not set, consider setting it to identify your requests.


<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2889bfbdbe0>

In [4]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/billing', 'title': 'Manage billing in your account - Docs by LangChain', 'language': 'en'}, page_content='Manage billing in your account - Docs by LangChainSkip to main contentDocs by LangChain home pageLangSmithSearch...⌘KSupportGitHubTry LangSmithTry LangSmithSearch...NavigationAccount administrationManage billing in your accountGet startedObservabilityEvaluationPrompt engineeringDeploymentPlatform setupReferenceOverviewPlansCreate an account and API keyAccount administrationOverviewSet up hierarchyWorkload isolationManage organizations using the APIManage billingSet up resource tagsUser managementAdditional resourcesPolly (Beta)Data managementAccess control & AuthenticationScalability & resilienceFAQsRegions FAQPricing FAQLangSmith statusOn this pageSet up billing for your accountDeveloper plan: set up billing on your personal organizationPlus plan: set up billing on a shared organizationUpdate your information (Pai

### Steps
- Load Data
- Docs
- Divide text into chunks
- Convert to vectors using Embedding
- Store in VectorStore DB

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/billing', 'title': 'Manage billing in your account - Docs by LangChain', 'language': 'en'}, page_content='Manage billing in your account - Docs by LangChainSkip to main contentDocs by LangChain home pageLangSmithSearch...⌘KSupportGitHubTry LangSmithTry LangSmithSearch...NavigationAccount administrationManage billing in your accountGet startedObservabilityEvaluationPrompt engineeringDeploymentPlatform setupReferenceOverviewPlansCreate an account and API keyAccount administrationOverviewSet up hierarchyWorkload isolationManage organizations using the APIManage billingSet up resource tagsUser managementAdditional resourcesPolly (Beta)Data managementAccess control & AuthenticationScalability & resilienceFAQsRegions FAQPricing FAQLangSmith statusOn this pageSet up billing for your accountDeveloper plan: set up billing on your personal organizationPlus plan: set up billing on a shared organizationUpdate your information (Pai

In [6]:
from langchain_ollama import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="llama3.2")
embeddings

OllamaEmbeddings(model='llama3.2', validate_model_on_init=False, base_url=None, client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

In [7]:
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(documents, embedding=embeddings)
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x2889bfbf0e0>

In [8]:
query = "LangSmith charges for deployed agents"
response = vector_store.similarity_search(query=query)
response[0].page_content

'LangSmith Traces (Base Charge): tracks all traces that you send to LangSmith.\nLangSmith Traces (Extended Data Retention Upgrades): tracks all traces that also have our Extended 400 Day Data Retention.\nLangSmith Deployment Runs: tracks end-to-end invocations of deployed LangGraph agents.\n\nFor more details on traces, refer to the data retention conceptual docs. For more details on Deployment Runs, refer to LangSmith Deployment billing.\n\u200bInvoices\nTo understand how your usage translates to spend, navigate to the Invoices tab. The first invoice that will appear on screen is a draft of your current month’s invoice, which shows your running spend thus far this month.\nLangSmith’s Usage Graph and Invoice use the term tenant_id to refer to a workspace ID. They are interchangeable.\n\u200bSet limits on usage\n\n\u200bSet spend limit for workspace'

In [9]:
from langchain_ollama import ChatOllama
llm = ChatOllama(model="llama3.2")
llm

ChatOllama(model='llama3.2')

In [10]:
from langchain_classic.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following questions based only on the provided context:
    <context>
    {context}
    </context>
    """
)

document_chain = create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following questions based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOllama(model='llama3.2')
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [11]:
from langchain_core.documents import Document
document_chain.invoke(
    {
        "input" : "What is Langsmith Deployment Billing?",
        "context" : [Document(page_content="LangSmith charges for deployed agents via LangSmith Deployment (formerly LangGraph Platform).")] 
    }
)

"There is no additional context to answer specific questions. However, based on the provided context:\n\nLangSmith offers a deployment service called LangSmith Deployment (formerly LangGraph Platform), but it doesn't provide information about charges for deployed agents beyond this general statement."

### Retriever

In [14]:
retriever = vector_store.as_retriever()
from langchain_classic.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002889BFBF0E0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following questions based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})

In [15]:
response = retrieval_chain.invoke({"input" : "LangSmith charges for deployed agents"})
response['answer']

'I\'ll answer the questions based only on the provided context:\n\n\n1. How does LangSmith Traces (Extended Data Retention Upgrades) track data?\n\nLangSmith Traces (Extended Data Retention Upgrades) tracks all traces that also have our Extended 400 Day Data Retention.\n\n2. What is the term used to refer to a workspace ID in LangSmith\'s Usage Graph and Invoice?\n\nThe term "tenant_id" is used interchangeably with "workspace ID".\n\n3. How do I set spend limits on my usage?\n\nYou can set spend limits by navigating to the Billing and Usage page under Settings, selecting a country that requires a Tax ID field, entering necessary information, saving changes, and then enforcing spend limits.\n\n4. What are the special instructions for setting up billing on legacy accounts?\n\nThere are no specific instructions mentioned in the provided context.\n\n5. How many traces can personal organizations use per month until a credit card is added?\n\nPersonal organizations are limited to 5,000 trace